From 3a4652613d0d3c6c8adae1bf7937deecf888d6f3 Mon Sep 17 00:00:00 2001 From: charlie sievers Date: Mon, 13 Jul 2020 19:43:24 -0700 Subject: [PATCH 0001/1471] Add folded option, change ballistico to eskm, add post force modifications --- src/USER-PHONON/dynamical_matrix.cpp | 79 +++++++++++++---- src/USER-PHONON/dynamical_matrix.h | 2 + src/USER-PHONON/third_order.cpp | 126 ++++++++++++++++++++------- src/USER-PHONON/third_order.h | 2 + 4 files changed, 157 insertions(+), 52 deletions(-) diff --git a/src/USER-PHONON/dynamical_matrix.cpp b/src/USER-PHONON/dynamical_matrix.cpp index 1495219124..ced290f6fe 100644 --- a/src/USER-PHONON/dynamical_matrix.cpp +++ b/src/USER-PHONON/dynamical_matrix.cpp @@ -125,6 +125,7 @@ void DynamicalMatrix::command(int narg, char **arg) compressed = 0; file_flag = 0; file_opened = 0; + folded = 0; conversion = 1; // read options from end of input line @@ -132,6 +133,9 @@ void DynamicalMatrix::command(int narg, char **arg) else if (style == ESKM) options(narg-3,&arg[3]); //COME BACK else if (comm->me == 0 && screen) fprintf(screen,"Illegal Dynamical Matrix command\n"); + if (!folded) dynlenb = dynlen; + if (folded) dynlenb = (atom->natoms)*3; + if (atom->map_style == 0) error->all(FLERR,"Dynamical_matrix command requires an atom map, see atom_modify"); @@ -184,6 +188,16 @@ void DynamicalMatrix::options(int narg, char **arg) filename = arg[iarg + 1]; file_flag = 1; iarg += 2; + } else if (strcmp(arg[iarg],"fold") == 0) { + if (iarg+2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); + if (strcmp(arg[iarg+1],"yes") == 0) { + folded = 1; + } + else if (strcmp(arg[iarg+1],"no") == 0) { + folded = 0; + } + else error->all(FLERR,"Illegal input for dynamical_matrix fold option"); + iarg += 2; } else error->all(FLERR,"Illegal dynamical_matrix command"); } if (file_flag == 1) { @@ -244,11 +258,11 @@ void DynamicalMatrix::calculateMatrix() double **dynmat = new double*[3]; for (int i=0; i<3; i++) - dynmat[i] = new double[dynlen]; + dynmat[i] = new double[dynlenb]; double **fdynmat = new double*[3]; for (int i=0; i<3; i++) - fdynmat[i] = new double[dynlen]; + fdynmat[i] = new double[dynlenb]; //initialize dynmat to all zeros dynmat_clear(dynmat); @@ -257,7 +271,7 @@ void DynamicalMatrix::calculateMatrix() fprintf(screen,"Calculating Dynamical Matrix ...\n"); fprintf(screen," Total # of atoms = " BIGINT_FORMAT "\n", natoms); fprintf(screen," Atoms in group = " BIGINT_FORMAT "\n", gcount); - fprintf(screen," Total dynamical matrix elements = " BIGINT_FORMAT "\n", (dynlen*dynlen) ); + fprintf(screen," Total dynamical matrix elements = " BIGINT_FORMAT "\n", (dynlenb*dynlen) ); } // emit dynlen rows of dimalpha*dynlen*dimbeta elements @@ -274,9 +288,16 @@ void DynamicalMatrix::calculateMatrix() for (bigint j=1; j<=natoms; j++){ local_jdx = atom->map(j); if (local_idx >= 0 && local_jdx >= 0 && local_jdx < nlocal - && gm[j-1] >= 0){ - for (int beta=0; beta<3; beta++){ - dynmat[alpha][gm[j-1]*3+beta] -= f[local_jdx][beta]; + && (gm[j-1] >= 0 || folded)){ + if (folded) { + for (int beta=0; beta<3; beta++){ + dynmat[alpha][(j-1)*3+beta] -= f[local_jdx][beta]; + } + } + else { + for (int beta=0; beta<3; beta++){ + dynmat[alpha][gm[j-1]*3+beta] -= f[local_jdx][beta]; + } } } } @@ -285,22 +306,35 @@ void DynamicalMatrix::calculateMatrix() for (bigint j=1; j<=natoms; j++){ local_jdx = atom->map(j); if (local_idx >= 0 && local_jdx >= 0 && local_jdx < nlocal - && gm[j-1] >= 0){ - for (int beta=0; beta<3; beta++){ - if (atom->rmass_flag == 1) - imass = sqrt(m[local_idx] * m[local_jdx]); - else - imass = sqrt(m[type[local_idx]] * m[type[local_jdx]]); - dynmat[alpha][gm[j-1]*3+beta] -= -f[local_jdx][beta]; - dynmat[alpha][gm[j-1]*3+beta] /= (2 * del * imass); - dynmat[alpha][gm[j-1]*3+beta] *= conversion; + && (gm[j-1] >= 0 || folded)){ + if (folded){ + for (int beta=0; beta<3; beta++){ + if (atom->rmass_flag == 1) + imass = sqrt(m[local_idx] * m[local_jdx]); + else + imass = sqrt(m[type[local_idx]] * m[type[local_jdx]]); + dynmat[alpha][(j-1)*3+beta] -= -f[local_jdx][beta]; + dynmat[alpha][(j-1)*3+beta] /= (2 * del * imass); + dynmat[alpha][(j-1)*3+beta] *= conversion; + } + } + else{ + for (int beta=0; beta<3; beta++){ + if (atom->rmass_flag == 1) + imass = sqrt(m[local_idx] * m[local_jdx]); + else + imass = sqrt(m[type[local_idx]] * m[type[local_jdx]]); + dynmat[alpha][gm[j-1]*3+beta] -= -f[local_jdx][beta]; + dynmat[alpha][gm[j-1]*3+beta] /= (2 * del * imass); + dynmat[alpha][gm[j-1]*3+beta] *= conversion; + } } } } displace_atom(local_idx,alpha,1); } for (int k=0; k<3; k++) - MPI_Reduce(dynmat[k],fdynmat[k],dynlen,MPI_DOUBLE,MPI_SUM,0,world); + MPI_Reduce(dynmat[k],fdynmat[k],dynlenb,MPI_DOUBLE,MPI_SUM,0,world); if (me == 0) writeMatrix(fdynmat); dynmat_clear(dynmat); @@ -338,12 +372,12 @@ void DynamicalMatrix::writeMatrix(double **dynmat) clearerr(fp); if (binaryflag) { for (int i=0; i<3; i++) - fwrite(dynmat[i], sizeof(double), dynlen, fp); + fwrite(dynmat[i], sizeof(double), dynlenb, fp); if (ferror(fp)) error->one(FLERR, "Error writing to binary file"); } else { for (int i = 0; i < 3; i++) { - for (bigint j = 0; j < dynlen; j++) { + for (bigint j = 0; j < dynlenb; j++) { if ((j+1)%3==0) fprintf(fp, "%4.8f\n", dynmat[i][j]); else fprintf(fp, "%4.8f ",dynmat[i][j]); } @@ -385,6 +419,7 @@ void DynamicalMatrix::displace_atom(int local_idx, int direction, int magnitude) void DynamicalMatrix::update_force() { force_clear(); + int n_post_force = modify->n_post_force; if (pair_compute_flag) { force->pair->compute(eflag,vflag); @@ -405,6 +440,12 @@ void DynamicalMatrix::update_force() comm->reverse_comm(); timer->stamp(Timer::COMM); } + + // force modifications + + if (n_post_force) modify->post_force(vflag); + timer->stamp(Timer::MODIFY); + ++ update->nsteps; } @@ -435,7 +476,7 @@ void DynamicalMatrix::force_clear() void DynamicalMatrix::dynmat_clear(double **dynmat) { - size_t nbytes = sizeof(double) * dynlen; + size_t nbytes = sizeof(double) * dynlenb; if (nbytes) { for (int i=0; i<3; i++) diff --git a/src/USER-PHONON/dynamical_matrix.h b/src/USER-PHONON/dynamical_matrix.h index 8ff11044ea..02b52defb4 100644 --- a/src/USER-PHONON/dynamical_matrix.h +++ b/src/USER-PHONON/dynamical_matrix.h @@ -56,6 +56,7 @@ namespace LAMMPS_NS { int igroup,groupbit; bigint gcount; // number of atoms in group bigint dynlen; // rank of dynamical matrix + bigint dynlenb; // new dynlen if folded int scaleflag; int me; bigint *groupmap; @@ -64,6 +65,7 @@ namespace LAMMPS_NS { int binaryflag; // 1 if dump file is written binary, 0 no int file_opened; // 1 if openfile method has been called, 0 no int file_flag; // 1 custom file name, 0 dynmat.dat + int folded; // 1 folded, 0 nonfolded FILE *fp; }; diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index 7764287337..bc5d766ba6 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -29,7 +29,7 @@ using namespace LAMMPS_NS; using namespace MathSpecial; -enum{REGULAR,BALLISTICO}; +enum{REGULAR,ESKM}; /* ---------------------------------------------------------------------- */ @@ -116,7 +116,7 @@ void ThirdOrder::command(int narg, char **arg) int style = -1; if (strcmp(arg[1],"regular") == 0) style = REGULAR; - else if (strcmp(arg[1],"eskm") == 0) style = BALLISTICO; + else if (strcmp(arg[1],"eskm") == 0) style = ESKM; else error->all(FLERR,"Illegal Dynamical Matrix command"); // set option defaults @@ -127,13 +127,17 @@ void ThirdOrder::command(int narg, char **arg) file_flag = 0; file_opened = 0; conversion = 1; + folded = 0; // read options from end of input line if (style == REGULAR) options(narg-3,&arg[3]); //COME BACK - else if (style == BALLISTICO) options(narg-3,&arg[3]); //COME BACK + else if (style == ESKM) options(narg-3,&arg[3]); //COME BACK else if (comm->me == 0 && screen) fprintf(screen,"Illegal Dynamical Matrix command\n"); del = force->numeric(FLERR, arg[2]); + if (!folded) dynlenb = dynlen; + if (folded) dynlenb = (atom->natoms)*3; + if (atom->map_style == 0) error->all(FLERR,"third_order command requires an atom map, see atom_modify"); @@ -147,7 +151,7 @@ void ThirdOrder::command(int narg, char **arg) timer->barrier_stop(); } - if (style == BALLISTICO) { + if (style == ESKM) { setup(); convert_units(update->unit_style); conversion = conv_energy/conv_distance/conv_distance; @@ -187,6 +191,16 @@ void ThirdOrder::options(int narg, char **arg) binaryflag = 1; } iarg += 2; + } else if (strcmp(arg[iarg],"fold") == 0) { + if (iarg+2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); + if (strcmp(arg[iarg+1],"yes") == 0) { + folded = 1; + } + else if (strcmp(arg[iarg+1],"no") == 0) { + folded = 0; + } + else error->all(FLERR,"Illegal input for dynamical_matrix fold option"); + iarg += 2; } else error->all(FLERR,"Illegal third_order command"); } if (file_flag == 1 and me == 0) { @@ -242,23 +256,25 @@ void ThirdOrder::calculateMatrix() bigint *gm = groupmap; double **f = atom->f; - double *dynmat = new double[3*dynlen]; - double *fdynmat = new double[3*dynlen]; - memset(&dynmat[0],0,dynlen*sizeof(double)); - memset(&fdynmat[0],0,dynlen*sizeof(double)); + double *dynmat = new double[dynlenb]; + double *fdynmat = new double[dynlenb]; + memset(&dynmat[0],0,dynlenb*sizeof(double)); + memset(&fdynmat[0],0,dynlenb*sizeof(double)); if (comm->me == 0 && screen) { fprintf(screen,"Calculating Third Order ...\n"); fprintf(screen," Total # of atoms = " BIGINT_FORMAT "\n", natoms); fprintf(screen," Atoms in group = " BIGINT_FORMAT "\n", gcount); fprintf(screen," Total third order elements = " - BIGINT_FORMAT "\n", (dynlen*dynlen*dynlen) ); + BIGINT_FORMAT "\n", (dynlen*dynlenb*dynlenb) ); } update->nsteps = 0; int prog = 0; for (bigint i=1; i<=natoms; i++){ local_idx = atom->map(i); + if (gm[i-1] < 0) + continue; for (int alpha=0; alpha<3; alpha++){ for (bigint j=1; j<=natoms; j++){ local_jdx = atom->map(j); @@ -270,9 +286,13 @@ void ThirdOrder::calculateMatrix() local_kdx = atom->map(k); for (int gamma=0; gamma<3; gamma++){ if (local_idx >= 0 && local_jdx >= 0 && local_kdx >= 0 - && gm[i-1] >= 0 && gm[j-1] >= 0 && gm[k-1] >= 0 + && ((gm[j-1] >= 0 && gm[k-1] >= 0) || folded) && local_kdx < nlocal) { - dynmat[gm[k-1]*3+gamma] += f[local_kdx][gamma]; + if (folded) { + dynmat[(k-1)*3+gamma] += f[local_kdx][gamma]; + } else { + dynmat[gm[k-1]*3+gamma] += f[local_kdx][gamma]; + } } } } @@ -282,9 +302,13 @@ void ThirdOrder::calculateMatrix() local_kdx = atom->map(k); for (int gamma=0; gamma<3; gamma++){ if (local_idx >= 0 && local_jdx >= 0 && local_kdx >= 0 - && gm[i-1] >= 0 && gm[j-1] >= 0 && gm[k-1] >= 0 + && ((gm[j-1] >= 0 && gm[k-1] >= 0) || folded) && local_kdx < nlocal) { - dynmat[gm[k-1]*3+gamma] -= f[local_kdx][gamma]; + if (folded) { + dynmat[(k-1)*3+gamma] -= f[local_kdx][gamma]; + } else { + dynmat[gm[k-1]*3+gamma] -= f[local_kdx][gamma]; + } } } } @@ -296,9 +320,13 @@ void ThirdOrder::calculateMatrix() local_kdx = atom->map(k); for (int gamma=0; gamma<3; gamma++){ if (local_idx >= 0 && local_jdx >= 0 && local_kdx >= 0 - && gm[i-1] >= 0 && gm[j-1] >= 0 && gm[k-1] >= 0 + && ((gm[j-1] >= 0 && gm[k-1] >= 0) || folded) && local_kdx < nlocal) { - dynmat[gm[k-1]*3+gamma] -= f[local_kdx][gamma]; + if (folded) { + dynmat[(k-1)*3+gamma] -= f[local_kdx][gamma]; + } else { + dynmat[gm[k-1]*3+gamma] -= f[local_kdx][gamma]; + } } } } @@ -308,20 +336,29 @@ void ThirdOrder::calculateMatrix() local_kdx = atom->map(k); for (int gamma=0; gamma<3; gamma++){ if (local_idx >= 0 && local_jdx >= 0 && local_kdx >= 0 - && gm[i-1] >= 0 && gm[j-1] >= 0 && gm[k-1] >= 0 + && ((gm[j-1] >= 0 && gm[k-1] >= 0) || folded) && local_kdx < nlocal) { - dynmat[gm[k-1]*3+gamma] += f[local_kdx][gamma]; - dynmat[gm[k-1]*3+gamma] /= (4 * del * del); + if (folded) { + dynmat[(k-1)*3+gamma] += f[local_kdx][gamma]; + dynmat[(k-1)*3+gamma] /= (4 * del * del); + } else { + dynmat[gm[k-1]*3+gamma] += f[local_kdx][gamma]; + dynmat[gm[k-1]*3+gamma] /= (4 * del * del); + } } } } displace_atom(local_jdx, beta, 1); displace_atom(local_idx, alpha, 1); - MPI_Reduce(dynmat,fdynmat,3*dynlen,MPI_DOUBLE,MPI_SUM,0,world); + MPI_Reduce(dynmat,fdynmat,dynlenb,MPI_DOUBLE,MPI_SUM,0,world); if (me == 0){ - writeMatrix(fdynmat, gm[i-1], alpha, gm[j-1], beta); + if (folded) { + writeMatrix(fdynmat, gm[i-1], alpha, (j-1), beta); + } else { + writeMatrix(fdynmat, gm[i-1], alpha, gm[j-1], beta); + } } - memset(&dynmat[0],0,dynlen*sizeof(double)); + memset(&dynmat[0],0,dynlenb*sizeof(double)); } } } @@ -354,18 +391,34 @@ void ThirdOrder::writeMatrix(double *dynmat, bigint i, int a, bigint j, int b) double norm; if (!binaryflag && fp) { clearerr(fp); - for (int k = 0; k < gcount; k++){ - norm = square(dynmat[k*3])+ - square(dynmat[k*3+1])+ - square(dynmat[k*3+2]); - if (norm > 1.0e-16) - fprintf(fp, - BIGINT_FORMAT " %d " BIGINT_FORMAT " %d " BIGINT_FORMAT - " %7.8f %7.8f %7.8f\n", - i+1, a + 1, j+1, b + 1, groupmap[k]+1, - dynmat[k*3] * conversion, - dynmat[k*3+1] * conversion, - dynmat[k*3+2] * conversion); + if (folded){ + for (int k = 0; k < atom->natoms; k++){ + norm = square(dynmat[k*3])+ + square(dynmat[k*3+1])+ + square(dynmat[k*3+2]); + if (norm > 1.0e-16) + fprintf(fp, + BIGINT_FORMAT " %d " BIGINT_FORMAT " %d %d" + " %7.8f %7.8f %7.8f\n", + i+1, a + 1, j+1, b + 1, k+1, + dynmat[k*3] * conversion, + dynmat[k*3+1] * conversion, + dynmat[k*3+2] * conversion); + } + } else { + for (int k = 0; k < gcount; k++){ + norm = square(dynmat[k*3])+ + square(dynmat[k*3+1])+ + square(dynmat[k*3+2]); + if (norm > 1.0e-16) + fprintf(fp, + BIGINT_FORMAT " %d " BIGINT_FORMAT " %d " BIGINT_FORMAT + " %7.8f %7.8f %7.8f\n", + i+1, a + 1, j+1, b + 1, groupmap[k]+1, + dynmat[k*3] * conversion, + dynmat[k*3+1] * conversion, + dynmat[k*3+2] * conversion); + } } } else if (binaryflag && fp){ clearerr(fp); @@ -406,6 +459,7 @@ void ThirdOrder::displace_atom(int local_idx, int direction, int magnitude) void ThirdOrder::update_force() { force_clear(); + int n_post_force = modify->n_post_force; if (pair_compute_flag) { force->pair->compute(eflag,vflag); @@ -426,6 +480,12 @@ void ThirdOrder::update_force() comm->reverse_comm(); timer->stamp(Timer::COMM); } + + // force modifications + + if (n_post_force) modify->post_force(vflag); + timer->stamp(Timer::MODIFY); + ++ update->nsteps; } diff --git a/src/USER-PHONON/third_order.h b/src/USER-PHONON/third_order.h index 83062b6b1f..7bd80840f5 100644 --- a/src/USER-PHONON/third_order.h +++ b/src/USER-PHONON/third_order.h @@ -56,6 +56,7 @@ namespace LAMMPS_NS { double del; int igroup,groupbit; bigint dynlen; + bigint dynlenb; int scaleflag; int me; bigint gcount; // number of atoms in group @@ -65,6 +66,7 @@ namespace LAMMPS_NS { int binaryflag; // 1 if dump file is written binary, 0 no int file_opened; // 1 if openfile method has been called, 0 no int file_flag; // 1 custom file name, 0 dynmat.dat + int folded; // 1 if system is folded, 0 no FILE *fp; }; From 4b656b39612a2fecb238778f11c3ea3960f7384d Mon Sep 17 00:00:00 2001 From: Sievers Date: Tue, 14 Jul 2020 15:47:14 -0700 Subject: [PATCH 0002/1471] Check if atom is part of group before computing forces --- src/USER-PHONON/third_order.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index bc5d766ba6..0f5fc08bc5 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -20,6 +20,7 @@ #include "kspace.h" #include "update.h" #include "neighbor.h" +#include "modify.h" #include "pair.h" #include "timer.h" #include "finish.h" @@ -278,6 +279,8 @@ void ThirdOrder::calculateMatrix() for (int alpha=0; alpha<3; alpha++){ for (bigint j=1; j<=natoms; j++){ local_jdx = atom->map(j); + if (gm[j-1] < 0 && !folded) + continue; for (int beta=0; beta<3; beta++){ displace_atom(local_idx, alpha, 1); displace_atom(local_jdx, beta, 1); From 999dd13924d386bbe6e9727ac1f16f95dac42215 Mon Sep 17 00:00:00 2001 From: Sievers Date: Wed, 15 Jul 2020 12:45:46 -0700 Subject: [PATCH 0003/1471] Draft of force calculation reduction through neighbor lists --- src/USER-PHONON/third_order.cpp | 131 +++++++++++++++++++++++++++++--- src/USER-PHONON/third_order.h | 5 ++ 2 files changed, 126 insertions(+), 10 deletions(-) diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index 0f5fc08bc5..2e85b37bf2 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -20,6 +20,8 @@ #include "kspace.h" #include "update.h" #include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" #include "modify.h" #include "pair.h" #include "timer.h" @@ -46,6 +48,8 @@ ThirdOrder::~ThirdOrder() if (fp && me == 0) fclose(fp); fp = NULL; memory->destroy(groupmap); + memory->destroy(ijnum); + memory->destroy(neighbortags); } /* ---------------------------------------------------------------------- @@ -71,6 +75,10 @@ void ThirdOrder::setup() domain->box_too_small_check(); neighbor->build(1); + // build neighbor list this command needs based on earlier request + + neighbor->build_one(list); + // compute all forces external_force_clear = 0; eflag=0; @@ -94,7 +102,18 @@ void ThirdOrder::command(int narg, char **arg) error->all(FLERR,"third_order command before simulation box is defined"); if (narg < 2) error->all(FLERR,"Illegal third_order command"); + // request a full neighbor list for use by this command + + int irequest = neighbor->request(this); + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->command = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->occasional = 1; + neighbor->requests[irequest]->command_style = "third_order"; + lmp->init(); + list = neighbor->lists[irequest]; // orthogonal vs triclinic simulation box @@ -130,6 +149,10 @@ void ThirdOrder::command(int narg, char **arg) conversion = 1; folded = 0; + // set Neigborlist attributes to NULL + ijnum = NULL; + neighbortags = NULL; + // read options from end of input line if (style == REGULAR) options(narg-3,&arg[3]); //COME BACK else if (style == ESKM) options(narg-3,&arg[3]); //COME BACK @@ -256,12 +279,17 @@ void ThirdOrder::calculateMatrix() bigint natoms = atom->natoms; bigint *gm = groupmap; double **f = atom->f; + int inum; + bigint j; + bigint *firstneigh; double *dynmat = new double[dynlenb]; double *fdynmat = new double[dynlenb]; memset(&dynmat[0],0,dynlenb*sizeof(double)); memset(&fdynmat[0],0,dynlenb*sizeof(double)); + getNeighbortags(); + if (comm->me == 0 && screen) { fprintf(screen,"Calculating Third Order ...\n"); fprintf(screen," Total # of atoms = " BIGINT_FORMAT "\n", natoms); @@ -273,14 +301,17 @@ void ThirdOrder::calculateMatrix() update->nsteps = 0; int prog = 0; for (bigint i=1; i<=natoms; i++){ - local_idx = atom->map(i); if (gm[i-1] < 0) continue; + inum = ijnum[i-1]; + firstneigh = neighbortags[i-1]; + local_idx = atom->map(i); for (int alpha=0; alpha<3; alpha++){ - for (bigint j=1; j<=natoms; j++){ - local_jdx = atom->map(j); - if (gm[j-1] < 0 && !folded) + for (int jj=0; jjmap(j+1); for (int beta=0; beta<3; beta++){ displace_atom(local_idx, alpha, 1); displace_atom(local_jdx, beta, 1); @@ -289,7 +320,7 @@ void ThirdOrder::calculateMatrix() local_kdx = atom->map(k); for (int gamma=0; gamma<3; gamma++){ if (local_idx >= 0 && local_jdx >= 0 && local_kdx >= 0 - && ((gm[j-1] >= 0 && gm[k-1] >= 0) || folded) + && ((gm[j] >= 0 && gm[k-1] >= 0) || folded) && local_kdx < nlocal) { if (folded) { dynmat[(k-1)*3+gamma] += f[local_kdx][gamma]; @@ -305,7 +336,7 @@ void ThirdOrder::calculateMatrix() local_kdx = atom->map(k); for (int gamma=0; gamma<3; gamma++){ if (local_idx >= 0 && local_jdx >= 0 && local_kdx >= 0 - && ((gm[j-1] >= 0 && gm[k-1] >= 0) || folded) + && ((gm[j] >= 0 && gm[k-1] >= 0) || folded) && local_kdx < nlocal) { if (folded) { dynmat[(k-1)*3+gamma] -= f[local_kdx][gamma]; @@ -323,7 +354,7 @@ void ThirdOrder::calculateMatrix() local_kdx = atom->map(k); for (int gamma=0; gamma<3; gamma++){ if (local_idx >= 0 && local_jdx >= 0 && local_kdx >= 0 - && ((gm[j-1] >= 0 && gm[k-1] >= 0) || folded) + && ((gm[j] >= 0 && gm[k-1] >= 0) || folded) && local_kdx < nlocal) { if (folded) { dynmat[(k-1)*3+gamma] -= f[local_kdx][gamma]; @@ -339,7 +370,7 @@ void ThirdOrder::calculateMatrix() local_kdx = atom->map(k); for (int gamma=0; gamma<3; gamma++){ if (local_idx >= 0 && local_jdx >= 0 && local_kdx >= 0 - && ((gm[j-1] >= 0 && gm[k-1] >= 0) || folded) + && ((gm[j] >= 0 && gm[k-1] >= 0) || folded) && local_kdx < nlocal) { if (folded) { dynmat[(k-1)*3+gamma] += f[local_kdx][gamma]; @@ -356,9 +387,9 @@ void ThirdOrder::calculateMatrix() MPI_Reduce(dynmat,fdynmat,dynlenb,MPI_DOUBLE,MPI_SUM,0,world); if (me == 0){ if (folded) { - writeMatrix(fdynmat, gm[i-1], alpha, (j-1), beta); + writeMatrix(fdynmat, gm[i-1], alpha, j, beta); } else { - writeMatrix(fdynmat, gm[i-1], alpha, gm[j-1], beta); + writeMatrix(fdynmat, gm[i-1], alpha, gm[j], beta); } } memset(&dynmat[0],0,dynlenb*sizeof(double)); @@ -636,3 +667,83 @@ void ThirdOrder::create_groupmap() delete[] sub_groupmap; delete[] temp_groupmap; } + +void ThirdOrder::getNeighbortags() { + bigint natoms = atom->natoms; + int *ilist,*jlist,*numneigh,**firstneigh; + int ii,jj,inum,jnum,sum; + int *temptags = (int*) malloc(natoms*sizeof(int)); + int *ijnumproc = (int*) malloc(natoms*sizeof(int)); + memory->create(ijnum, natoms, "thirdorder:ijnum"); + bigint **firsttags; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + memset(&ijnumproc[0],0,natoms*sizeof(int)); + for (ii = 0; ii < inum; ii++) { + //fprintf(screen, "i: %i on rank %i\n",atom->tag[ilist[ii] & NEIGHMASK], comm->me); + sum = 0; + memset(&temptags[0],0,natoms*sizeof(int)); + jnum = numneigh[ii]; + jlist = firstneigh[ii]; + temptags[atom->tag[ilist[ii] & NEIGHMASK]-1] = 1; + for (jj = 0; jj < jnum; jj++) { + // fprintf(screen, "i: %i and j: %i on rank %i\n",atom->tag[ilist[ii] & NEIGHMASK], atom->tag[jlist[jj] & NEIGHMASK],comm->me); + temptags[atom->tag[jlist[jj] & NEIGHMASK]-1] = 1; + } + for (bigint i=0; i<=natoms-1; i++) { + sum += temptags[i]; + } + ijnumproc[atom->tag[ilist[ii] & NEIGHMASK]-1] = sum; + // fprintf(screen, "tag %i sum %i on rank %i\n",atom->tag[ilist[ii] & NEIGHMASK], sum, comm->me); + } + MPI_Allreduce(ijnumproc,ijnum,natoms,MPI_INT,MPI_SUM,world); + sum = 0; + for (bigint i=0; i<=natoms-1; i++) { + sum += ijnum[i]; + } + + bigint nbytes = ((bigint) sizeof(bigint)) * sum; + bigint *data = (bigint *) memory->smalloc(nbytes, "thirdorder:firsttags"); + bigint *datarecv = (bigint *) memory->smalloc(nbytes, "thirdorder:neighbortags"); + nbytes = ((bigint) sizeof(bigint *)) * natoms; + firsttags = (bigint **) memory->smalloc(nbytes, "thirdorder:firsttags"); + neighbortags = (bigint **) memory->smalloc(nbytes, "thirdorder:neighbortags"); + memset(&data[0],0,sum*sizeof(bigint)); + memset(&datarecv[0],0,sum*sizeof(bigint)); + + bigint n = 0; + for (bigint i = 0; i < natoms; i++) { + firsttags[i] = &data[n]; + neighbortags[i] = &datarecv[n]; + n += ijnum[i]; + } + + for (ii = 0; ii < inum; ii++) { + int m = 0; + memset(&temptags[0],0,natoms*sizeof(int)); + jnum = numneigh[ii]; + jlist = firstneigh[ii]; + temptags[atom->tag[ilist[ii] & NEIGHMASK]-1] = 1; + for (jj = 0; jj < jnum; jj++) { + temptags[atom->tag[jlist[jj] & NEIGHMASK]-1] = 1; + } + for (int j=0; j < natoms; j++) { + if (temptags[j] == 1) { + firsttags[atom->tag[ilist[ii] & NEIGHMASK]-1][m] = j; + m += 1; + } + } + } + MPI_Allreduce(data,datarecv,sum,MPI_INT,MPI_SUM,world); + // for (bigint i=0; i < natoms; i++) { + // jnum = ijnum[i]; + // for (int j=0; j < jnum; j++) { + // fprintf(screen, "i: %lli and j: %i bool: %lli on rank %i\n", i, j, neighbortags[i][j], comm->me); + // } + // } + free (ijnumproc); + free (temptags); +} \ No newline at end of file diff --git a/src/USER-PHONON/third_order.h b/src/USER-PHONON/third_order.h index 7bd80840f5..3c39f912d6 100644 --- a/src/USER-PHONON/third_order.h +++ b/src/USER-PHONON/third_order.h @@ -48,6 +48,7 @@ namespace LAMMPS_NS { void convert_units(const char *style); void displace_atom(int local_idx, int direction, int magnitude); void writeMatrix(double *, bigint, int, bigint, int); + void getNeighbortags(); double conversion; double conv_energy; @@ -68,6 +69,10 @@ namespace LAMMPS_NS { int file_flag; // 1 custom file name, 0 dynmat.dat int folded; // 1 if system is folded, 0 no + class NeighList *list; + int *ijnum; + bigint **neighbortags; + FILE *fp; }; } From 7133311d2da0178930951ae4b9f7711946454678 Mon Sep 17 00:00:00 2001 From: Sievers Date: Thu, 16 Jul 2020 14:13:45 -0700 Subject: [PATCH 0004/1471] Change Allreduce to fit bigint --- src/USER-PHONON/third_order.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index 2e85b37bf2..b33dc73d88 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -737,13 +737,18 @@ void ThirdOrder::getNeighbortags() { } } } - MPI_Allreduce(data,datarecv,sum,MPI_INT,MPI_SUM,world); - // for (bigint i=0; i < natoms; i++) { - // jnum = ijnum[i]; - // for (int j=0; j < jnum; j++) { - // fprintf(screen, "i: %lli and j: %i bool: %lli on rank %i\n", i, j, neighbortags[i][j], comm->me); - // } - // } + MPI_Allreduce(data,datarecv,sum,MPI_LONG_LONG_INT,MPI_SUM,world); + for (bigint i=0; i < natoms; i++) { + jnum = ijnum[i]; + if (comm->me==0) + fprintf(screen, "i: %lli : ", i); + for (int j=0; j < jnum; j++) { + if (comm->me==0) + fprintf(screen, "%lli ",neighbortags[i][j]); + } + if (comm->me==0) + fprintf(screen, "\n"); + } free (ijnumproc); free (temptags); } \ No newline at end of file From dd6e5df3563550abfb019cf1981bfc42089eac10 Mon Sep 17 00:00:00 2001 From: Sievers Date: Thu, 16 Jul 2020 14:16:12 -0700 Subject: [PATCH 0005/1471] Remove print statement --- src/USER-PHONON/third_order.cpp | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index b33dc73d88..54d8ad4f3e 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -738,17 +738,7 @@ void ThirdOrder::getNeighbortags() { } } MPI_Allreduce(data,datarecv,sum,MPI_LONG_LONG_INT,MPI_SUM,world); - for (bigint i=0; i < natoms; i++) { - jnum = ijnum[i]; - if (comm->me==0) - fprintf(screen, "i: %lli : ", i); - for (int j=0; j < jnum; j++) { - if (comm->me==0) - fprintf(screen, "%lli ",neighbortags[i][j]); - } - if (comm->me==0) - fprintf(screen, "\n"); - } + free (ijnumproc); free (temptags); } \ No newline at end of file From 9011cfaa96da87c40f06ee6b20c0cadebbe24fb5 Mon Sep 17 00:00:00 2001 From: Sievers Date: Sat, 18 Jul 2020 02:27:31 -0700 Subject: [PATCH 0006/1471] Added neighbors of neighbors list indexed by tag and return tags. --- src/USER-PHONON/third_order.cpp | 89 ++++++++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 13 deletions(-) diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index 54d8ad4f3e..635d2be512 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -48,8 +48,8 @@ ThirdOrder::~ThirdOrder() if (fp && me == 0) fclose(fp); fp = NULL; memory->destroy(groupmap); - memory->destroy(ijnum); - memory->destroy(neighbortags); + // memory->destroy(ijnum); + // memory->destroy(neighbortags); } /* ---------------------------------------------------------------------- @@ -668,10 +668,13 @@ void ThirdOrder::create_groupmap() delete[] temp_groupmap; } +/* ---------------------------------------------------------------------- */ + void ThirdOrder::getNeighbortags() { bigint natoms = atom->natoms; int *ilist,*jlist,*numneigh,**firstneigh; - int ii,jj,inum,jnum,sum; + bigint *Jlist,*klist; + int ii,jj,kk,inum,jnum,knum,sum; int *temptags = (int*) malloc(natoms*sizeof(int)); int *ijnumproc = (int*) malloc(natoms*sizeof(int)); memory->create(ijnum, natoms, "thirdorder:ijnum"); @@ -681,28 +684,26 @@ void ThirdOrder::getNeighbortags() { ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - memset(&ijnumproc[0],0,natoms*sizeof(int)); + memset(&ijnum[0],0,natoms*sizeof(int)); for (ii = 0; ii < inum; ii++) { - //fprintf(screen, "i: %i on rank %i\n",atom->tag[ilist[ii] & NEIGHMASK], comm->me); sum = 0; memset(&temptags[0],0,natoms*sizeof(int)); jnum = numneigh[ii]; jlist = firstneigh[ii]; temptags[atom->tag[ilist[ii] & NEIGHMASK]-1] = 1; for (jj = 0; jj < jnum; jj++) { - // fprintf(screen, "i: %i and j: %i on rank %i\n",atom->tag[ilist[ii] & NEIGHMASK], atom->tag[jlist[jj] & NEIGHMASK],comm->me); temptags[atom->tag[jlist[jj] & NEIGHMASK]-1] = 1; } for (bigint i=0; i<=natoms-1; i++) { sum += temptags[i]; } - ijnumproc[atom->tag[ilist[ii] & NEIGHMASK]-1] = sum; - // fprintf(screen, "tag %i sum %i on rank %i\n",atom->tag[ilist[ii] & NEIGHMASK], sum, comm->me); + ijnum[atom->tag[ilist[ii] & NEIGHMASK]-1] = sum; } - MPI_Allreduce(ijnumproc,ijnum,natoms,MPI_INT,MPI_SUM,world); + MPI_Allreduce(ijnum,ijnumproc,natoms,MPI_INT,MPI_SUM,world); + memset(&ijnum[0],0,natoms*sizeof(int)); sum = 0; for (bigint i=0; i<=natoms-1; i++) { - sum += ijnum[i]; + sum += ijnumproc[i]; } bigint nbytes = ((bigint) sizeof(bigint)) * sum; @@ -718,7 +719,7 @@ void ThirdOrder::getNeighbortags() { for (bigint i = 0; i < natoms; i++) { firsttags[i] = &data[n]; neighbortags[i] = &datarecv[n]; - n += ijnum[i]; + n += ijnumproc[i]; } for (ii = 0; ii < inum; ii++) { @@ -732,13 +733,75 @@ void ThirdOrder::getNeighbortags() { } for (int j=0; j < natoms; j++) { if (temptags[j] == 1) { - firsttags[atom->tag[ilist[ii] & NEIGHMASK]-1][m] = j; + neighbortags[atom->tag[ilist[ii] & NEIGHMASK]-1][m] = j; m += 1; } } } - MPI_Allreduce(data,datarecv,sum,MPI_LONG_LONG_INT,MPI_SUM,world); + MPI_Allreduce(datarecv,data,sum,MPI_LONG_LONG_INT,MPI_SUM,world); + for (bigint i = 0; i < natoms; i++) { + ijnum[i] = 0; + sum = 0; + memset(&temptags[0],0,natoms*sizeof(int)); + jnum = ijnumproc[i]; + Jlist = firsttags[i]; + temptags[i] = 1; + for (jj = 0; jj < jnum; jj++) { + temptags[Jlist[jj]] = 1; + klist = firsttags[Jlist[jj]]; + knum = ijnumproc[Jlist[jj]]; + for (kk = 0; kk < knum; kk++) { + temptags[klist[kk]] = 1; + } + } + for (bigint j=0; jsmalloc(nbytes, "thirdorder:firsttags"); + nbytes = ((bigint) sizeof(bigint *)) * natoms; + neighbortags = (bigint **) memory->smalloc(nbytes, "thirdorder:neighbortags"); + memset(&datarecv[0],0,sum*sizeof(bigint)); + + n = 0; + for (bigint i = 0; i < natoms; i++) { + neighbortags[i] = &datarecv[n]; + n += ijnum[i]; + } + + for (bigint i = 0; i < natoms; i++) { + int m = 0; + memset(&temptags[0],0,natoms*sizeof(int)); + jnum = ijnumproc[i]; + Jlist = firsttags[i]; + temptags[i] = 1; + for (int j = 0; j < jnum; j++) { + temptags[Jlist[j]] = 1; + klist = firsttags[Jlist[j]]; + knum = ijnumproc[Jlist[j]]; + for (kk = 0; kk < knum; kk++) { + temptags[klist[kk]] = 1; + } + } + for (bigint j=0; j < natoms; j++) { + if (temptags[j] == 1) { + neighbortags[i][m] = j; + m += 1; + } + } + } + + free (firsttags); free (ijnumproc); free (temptags); } \ No newline at end of file From 5abddfe68de1e0ef8704e9399df8db9e12235a3c Mon Sep 17 00:00:00 2001 From: Sievers Date: Sun, 19 Jul 2020 19:12:01 -0700 Subject: [PATCH 0007/1471] Fixed nitpicky details, updated output, moved mass out of folded check --- src/USER-PHONON/dynamical_matrix.cpp | 26 +++++++++---------- src/USER-PHONON/third_order.cpp | 38 +++++++++++++++------------- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/src/USER-PHONON/dynamical_matrix.cpp b/src/USER-PHONON/dynamical_matrix.cpp index 4895da1f7f..ff746caec2 100644 --- a/src/USER-PHONON/dynamical_matrix.cpp +++ b/src/USER-PHONON/dynamical_matrix.cpp @@ -24,6 +24,8 @@ #include "pair.h" #include "timer.h" #include "finish.h" +#include "utils.h" +#include "fmt/format.h" #include using namespace LAMMPS_NS; @@ -135,7 +137,7 @@ void DynamicalMatrix::command(int narg, char **arg) else if (comm->me == 0 && screen) fprintf(screen,"Illegal Dynamical Matrix command\n"); if (!folded) dynlenb = dynlen; - if (folded) dynlenb = (atom->natoms)*3; + else dynlenb = (atom->natoms)*3; if (atom->map_style == 0) error->all(FLERR,"Dynamical_matrix command requires an atom map, see atom_modify"); @@ -268,11 +270,11 @@ void DynamicalMatrix::calculateMatrix() //initialize dynmat to all zeros dynmat_clear(dynmat); - if (comm->me == 0 && screen) { - fprintf(screen,"Calculating Dynamical Matrix ...\n"); - fprintf(screen," Total # of atoms = " BIGINT_FORMAT "\n", natoms); - fprintf(screen," Atoms in group = " BIGINT_FORMAT "\n", gcount); - fprintf(screen," Total dynamical matrix elements = " BIGINT_FORMAT "\n", (dynlenb*dynlen) ); + if (comm->me == 0) { + utils::logmesg(lmp,fmt::format("Calculating Dynamical Matrix ...\n")); + utils::logmesg(lmp,fmt::format(" Total # of atoms = {}\n", natoms)); + utils::logmesg(lmp,fmt::format(" Atoms in group = {}\n", gcount)); + utils::logmesg(lmp,fmt::format(" Total dynamical matrix elements = {}\n", (dynlenb*dynlen) )); } // emit dynlen rows of dimalpha*dynlen*dimbeta elements @@ -308,12 +310,12 @@ void DynamicalMatrix::calculateMatrix() local_jdx = atom->map(j); if (local_idx >= 0 && local_jdx >= 0 && local_jdx < nlocal && (gm[j-1] >= 0 || folded)){ + if (atom->rmass_flag == 1) + imass = sqrt(m[local_idx] * m[local_jdx]); + else + imass = sqrt(m[type[local_idx]] * m[type[local_jdx]]); if (folded){ for (int beta=0; beta<3; beta++){ - if (atom->rmass_flag == 1) - imass = sqrt(m[local_idx] * m[local_jdx]); - else - imass = sqrt(m[type[local_idx]] * m[type[local_jdx]]); dynmat[alpha][(j-1)*3+beta] -= -f[local_jdx][beta]; dynmat[alpha][(j-1)*3+beta] /= (2 * del * imass); dynmat[alpha][(j-1)*3+beta] *= conversion; @@ -321,10 +323,6 @@ void DynamicalMatrix::calculateMatrix() } else{ for (int beta=0; beta<3; beta++){ - if (atom->rmass_flag == 1) - imass = sqrt(m[local_idx] * m[local_jdx]); - else - imass = sqrt(m[type[local_idx]] * m[type[local_jdx]]); dynmat[alpha][gm[j-1]*3+beta] -= -f[local_jdx][beta]; dynmat[alpha][gm[j-1]*3+beta] /= (2 * del * imass); dynmat[alpha][gm[j-1]*3+beta] *= conversion; diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index 635d2be512..e1ac7a94f6 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -27,6 +27,8 @@ #include "timer.h" #include "finish.h" #include "math_special.h" +#include "utils.h" +#include "fmt/format.h" #include #include @@ -127,7 +129,7 @@ void ThirdOrder::command(int narg, char **arg) // group and style igroup = group->find(arg[0]); - if (igroup == -1) error->all(FLERR,"Could not find dynamical matrix group ID"); + if (igroup == -1) error->all(FLERR,"Could not find third_order group ID"); groupbit = group->bitmask[igroup]; gcount = group->count(igroup); dynlen = (gcount)*3; @@ -160,7 +162,7 @@ void ThirdOrder::command(int narg, char **arg) del = force->numeric(FLERR, arg[2]); if (!folded) dynlenb = dynlen; - if (folded) dynlenb = (atom->natoms)*3; + else dynlenb = (atom->natoms)*3; if (atom->map_style == 0) error->all(FLERR,"third_order command requires an atom map, see atom_modify"); @@ -216,14 +218,14 @@ void ThirdOrder::options(int narg, char **arg) } iarg += 2; } else if (strcmp(arg[iarg],"fold") == 0) { - if (iarg+2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); + if (iarg+2 > narg) error->all(FLERR, "Illegal third_order command"); if (strcmp(arg[iarg+1],"yes") == 0) { folded = 1; } else if (strcmp(arg[iarg+1],"no") == 0) { folded = 0; } - else error->all(FLERR,"Illegal input for dynamical_matrix fold option"); + else error->all(FLERR,"Illegal input for third_order fold option"); iarg += 2; } else error->all(FLERR,"Illegal third_order command"); } @@ -267,7 +269,7 @@ void ThirdOrder::openfile(const char* filename) } /* ---------------------------------------------------------------------- - create dynamical matrix + create third order tensor ------------------------------------------------------------------------- */ void ThirdOrder::calculateMatrix() @@ -290,12 +292,11 @@ void ThirdOrder::calculateMatrix() getNeighbortags(); - if (comm->me == 0 && screen) { - fprintf(screen,"Calculating Third Order ...\n"); - fprintf(screen," Total # of atoms = " BIGINT_FORMAT "\n", natoms); - fprintf(screen," Atoms in group = " BIGINT_FORMAT "\n", gcount); - fprintf(screen," Total third order elements = " - BIGINT_FORMAT "\n", (dynlen*dynlenb*dynlenb) ); + if (comm->me == 0) { + utils::logmesg(lmp, fmt::format("Calculating Third Order ...\n")); + utils::logmesg(lmp, fmt::format(" Total # of atoms = {}\n", natoms)); + utils::logmesg(lmp, fmt::format(" Atoms in group = {}\n", gcount)); + utils::logmesg(lmp, fmt::format(" Total third order elements = {}\n", (dynlen*dynlenb*dynlenb) )); } update->nsteps = 0; @@ -414,7 +415,7 @@ void ThirdOrder::calculateMatrix() } /* ---------------------------------------------------------------------- - write dynamical matrix + write third order tensor ------------------------------------------------------------------------- */ void ThirdOrder::writeMatrix(double *dynmat, bigint i, int a, bigint j, int b) @@ -432,8 +433,8 @@ void ThirdOrder::writeMatrix(double *dynmat, bigint i, int a, bigint j, int b) square(dynmat[k*3+2]); if (norm > 1.0e-16) fprintf(fp, - BIGINT_FORMAT " %d " BIGINT_FORMAT " %d %d" - " %7.8f %7.8f %7.8f\n", + BIGINT_FORMAT " %d " BIGINT_FORMAT + " %d %d %7.8f %7.8f %7.8f\n", i+1, a + 1, j+1, b + 1, k+1, dynmat[k*3] * conversion, dynmat[k*3+1] * conversion, @@ -446,8 +447,8 @@ void ThirdOrder::writeMatrix(double *dynmat, bigint i, int a, bigint j, int b) square(dynmat[k*3+2]); if (norm > 1.0e-16) fprintf(fp, - BIGINT_FORMAT " %d " BIGINT_FORMAT " %d " BIGINT_FORMAT - " %7.8f %7.8f %7.8f\n", + BIGINT_FORMAT " %d " BIGINT_FORMAT " %d " + BIGINT_FORMAT " %7.8f %7.8f %7.8f\n", i+1, a + 1, j+1, b + 1, groupmap[k]+1, dynmat[k*3] * conversion, dynmat[k*3+1] * conversion, @@ -671,6 +672,9 @@ void ThirdOrder::create_groupmap() /* ---------------------------------------------------------------------- */ void ThirdOrder::getNeighbortags() { + // Create an extended neighbor list which is indexed by atom tag and yields atom tags + // groupmap[global atom index-1] = global atom indices (-1) of extended neighbors + bigint natoms = atom->natoms; int *ilist,*jlist,*numneigh,**firstneigh; bigint *Jlist,*klist; @@ -738,7 +742,7 @@ void ThirdOrder::getNeighbortags() { } } } - MPI_Allreduce(datarecv,data,sum,MPI_LONG_LONG_INT,MPI_SUM,world); + MPI_Allreduce(datarecv,data,sum,MPI_LMP_BIGINT,MPI_SUM,world); for (bigint i = 0; i < natoms; i++) { ijnum[i] = 0; From e57b391d4024a4d1dedba842a78b6c5fd063cd09 Mon Sep 17 00:00:00 2001 From: Sievers Date: Mon, 20 Jul 2020 11:35:10 -0700 Subject: [PATCH 0008/1471] Add threading capability to both commands --- src/USER-PHONON/dynamical_matrix.cpp | 3 +++ src/USER-PHONON/third_order.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/USER-PHONON/dynamical_matrix.cpp b/src/USER-PHONON/dynamical_matrix.cpp index ff746caec2..0ff67c44a8 100644 --- a/src/USER-PHONON/dynamical_matrix.cpp +++ b/src/USER-PHONON/dynamical_matrix.cpp @@ -76,6 +76,9 @@ void DynamicalMatrix::setup() vflag=0; update_force(); + modify->setup(vflag); + update->setupflag = 0; + //if all then skip communication groupmap population if (gcount == atom->natoms) for (bigint i=0; inatoms; i++) diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index e1ac7a94f6..5b0a995d99 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -87,6 +87,9 @@ void ThirdOrder::setup() vflag=0; update_force(); + modify->setup(vflag); + update->setupflag = 0; + if (gcount == atom->natoms) for (bigint i=0; inatoms; i++) groupmap[i] = i; From 3a9796d9b39f6cec89623fc5c8dbd1aa85212ca8 Mon Sep 17 00:00:00 2001 From: Donatas Surblys Date: Mon, 14 Dec 2020 19:29:18 +0900 Subject: [PATCH 0009/1471] add flags for centroid stress --- src/fix.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/fix.h b/src/fix.h index 69fff154dc..f4ab28874b 100644 --- a/src/fix.h +++ b/src/fix.h @@ -113,6 +113,7 @@ class Fix : protected Pointers { double virial[6]; // virial for this timestep double *eatom, **vatom; // per-atom energy/virial for this timestep + double **cvatom; // per-atom centroid virial for this timestep int centroidstressflag; // centroid stress compared to two-body stress // CENTROID_SAME = same as two-body stress @@ -249,8 +250,8 @@ class Fix : protected Pointers { int evflag; int eflag_either, eflag_global, eflag_atom; - int vflag_either, vflag_global, vflag_atom; - int maxeatom, maxvatom; + int vflag_either, vflag_global, vflag_atom, cvflag_atom; + int maxeatom, maxvatom, maxcvatom; int copymode; // if set, do not deallocate during destruction // required when classes are used as functors by Kokkos @@ -263,7 +264,7 @@ class Fix : protected Pointers { ev_setup(eflag, vflag); else evflag = eflag_either = eflag_global = eflag_atom = vflag_either = vflag_global = vflag_atom = - 0; + cvflag_atom = 0; } void ev_setup(int, int); void ev_tally(int, int *, double, double, double *); @@ -273,7 +274,7 @@ class Fix : protected Pointers { if (vflag && thermo_virial) v_setup(vflag); else - evflag = vflag_either = vflag_global = vflag_atom = 0; + evflag = vflag_either = vflag_global = vflag_atom = cvflag_atom = 0; } void v_setup(int); void v_tally(int, int *, double, double *); From abba1204a8995809fd6372c3a42f76bc13b6f160 Mon Sep 17 00:00:00 2001 From: Donatas Surblys Date: Mon, 6 Dec 2021 17:12:39 +0900 Subject: [PATCH 0010/1471] support for centroid stress in fixes --- src/fix.cpp | 122 +++++++++++++++++++++++++++++++++++++++++++++++++--- src/fix.h | 1 + 2 files changed, 118 insertions(+), 5 deletions(-) diff --git a/src/fix.cpp b/src/fix.cpp index 996cd9b7d5..029345e6c6 100644 --- a/src/fix.cpp +++ b/src/fix.cpp @@ -35,7 +35,8 @@ int Fix::instance_total = 0; Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) : Pointers(lmp), id(nullptr), style(nullptr), extlist(nullptr), vector_atom(nullptr), array_atom(nullptr), - vector_local(nullptr), array_local(nullptr), eatom(nullptr), vatom(nullptr) + vector_local(nullptr), array_local(nullptr), eatom(nullptr), vatom(nullptr), + cvatom(nullptr) { instance_me = instance_total++; @@ -97,8 +98,8 @@ Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) : // set vflag_atom = 0 b/c some fixes grow vatom in grow_arrays() // which may occur outside of timestepping - maxeatom = maxvatom = 0; - vflag_atom = 0; + maxeatom = maxvatom = maxcvatom = 0; + vflag_atom = cvflag_atom = 0; centroidstressflag = CENTROID_SAME; // KOKKOS per-fix data masks @@ -122,6 +123,7 @@ Fix::~Fix() delete [] style; memory->destroy(eatom); memory->destroy(vatom); + memory->destroy(cvatom); } /* ---------------------------------------------------------------------- @@ -197,7 +199,13 @@ void Fix::ev_setup(int eflag, int vflag) else { vflag_either = vflag; vflag_global = vflag & (VIRIAL_PAIR | VIRIAL_FDOTR); - vflag_atom = vflag & (VIRIAL_ATOM | VIRIAL_CENTROID); + if (centroidstressflag != CENTROID_AVAIL) { + vflag_atom = vflag & (VIRIAL_ATOM | VIRIAL_CENTROID); + cvflag_atom = 0; + } else { + vflag_atom = vflag & VIRIAL_ATOM; + cvflag_atom = vflag & VIRIAL_CENTROID; + } } // reallocate per-atom arrays if necessary @@ -212,6 +220,11 @@ void Fix::ev_setup(int eflag, int vflag) memory->destroy(vatom); memory->create(vatom,maxvatom,6,"fix:vatom"); } + if (cvflag_atom && atom->nlocal > maxcvatom) { + maxcvatom = atom->nmax; + memory->destroy(cvatom); + memory->create(cvatom,maxcvatom,9,"fix:cvatom"); + } // zero accumulators // no global energy variable to zero (unlike pair,bond,angle,etc) @@ -233,6 +246,20 @@ void Fix::ev_setup(int eflag, int vflag) vatom[i][5] = 0.0; } } + if (cvflag_atom) { + n = atom->nlocal; + for (i = 0; i < n; i++) { + cvatom[i][0] = 0.0; + cvatom[i][1] = 0.0; + cvatom[i][2] = 0.0; + cvatom[i][3] = 0.0; + cvatom[i][4] = 0.0; + cvatom[i][5] = 0.0; + cvatom[i][6] = 0.0; + cvatom[i][7] = 0.0; + cvatom[i][8] = 0.0; + } + } } /* ---------------------------------------------------------------------- @@ -248,7 +275,13 @@ void Fix::v_setup(int vflag) evflag = 1; vflag_global = vflag & (VIRIAL_PAIR | VIRIAL_FDOTR); - vflag_atom = vflag & (VIRIAL_ATOM | VIRIAL_CENTROID); + if (centroidstressflag != CENTROID_AVAIL) { + vflag_atom = vflag & (VIRIAL_ATOM | VIRIAL_CENTROID); + cvflag_atom = 0; + } else { + vflag_atom = vflag & VIRIAL_ATOM; + cvflag_atom = vflag & VIRIAL_CENTROID; + } // reallocate per-atom array if necessary @@ -257,6 +290,11 @@ void Fix::v_setup(int vflag) memory->destroy(vatom); memory->create(vatom,maxvatom,6,"fix:vatom"); } + if (cvflag_atom && atom->nlocal > maxcvatom) { + maxcvatom = atom->nmax; + memory->destroy(cvatom); + memory->create(cvatom,maxcvatom,9,"fix:cvatom"); + } // zero accumulators @@ -272,6 +310,20 @@ void Fix::v_setup(int vflag) vatom[i][5] = 0.0; } } + if (cvflag_atom) { + n = atom->nlocal; + for (i = 0; i < n; i++) { + cvatom[i][0] = 0.0; + cvatom[i][1] = 0.0; + cvatom[i][2] = 0.0; + cvatom[i][3] = 0.0; + cvatom[i][4] = 0.0; + cvatom[i][5] = 0.0; + cvatom[i][6] = 0.0; + cvatom[i][7] = 0.0; + cvatom[i][8] = 0.0; + } + } } /* ---------------------------------------------------------------------- @@ -338,6 +390,66 @@ void Fix::v_tally(int n, int *list, double total, double *v) } } +/* ---------------------------------------------------------------------- + tally virial into global and per-atom accumulators + n = # of local owned atoms involved, with local indices in list + vtot = total virial for the interaction involving total atoms + npair = # of atom pairs with forces beween them + pairlist = indice list of pairs + fpairlist = forces between pairs + dellist = displacement vectors between pairs + increment global virial by n/total fraction + increment per-atom virial of each atom in list by 1/total fraction + add centroid form atomic virial contribution for each atom if available + this method can be used when fix computes forces in post_force() + e.g. fix shake, fix rigid: compute virial only on owned atoms + whether newton_bond is on or off + other procs will tally left-over fractions for atoms they own +------------------------------------------------------------------------- */ + +void Fix::v_tally(int n, int *list, double total, double *vtot, int nlocal, + int npair, int pairlist[][2], double *fpairlist, double dellist[][3]) +{ + + v_tally(n, list, total, vtot); + + if (cvflag_atom) { + double v[6]; + for (int i = 0; i < npair; i++) { + v[0] = 0.5*dellist[i][0]*dellist[i][0]*fpairlist[i]; + v[1] = 0.5*dellist[i][1]*dellist[i][1]*fpairlist[i]; + v[2] = 0.5*dellist[i][2]*dellist[i][2]*fpairlist[i]; + v[3] = 0.5*dellist[i][0]*dellist[i][1]*fpairlist[i]; + v[4] = 0.5*dellist[i][0]*dellist[i][2]*fpairlist[i]; + v[5] = 0.5*dellist[i][1]*dellist[i][2]*fpairlist[i]; + const int i0 = pairlist[i][0]; + const int i1 = pairlist[i][1]; + if (i0 < nlocal) { + cvatom[i0][0] += v[0]; + cvatom[i0][1] += v[1]; + cvatom[i0][2] += v[2]; + cvatom[i0][3] += v[3]; + cvatom[i0][4] += v[4]; + cvatom[i0][5] += v[5]; + cvatom[i0][6] += v[3]; + cvatom[i0][7] += v[4]; + cvatom[i0][8] += v[5]; + } + if (i1 < nlocal) { + cvatom[i1][0] += v[0]; + cvatom[i1][1] += v[1]; + cvatom[i1][2] += v[2]; + cvatom[i1][3] += v[3]; + cvatom[i1][4] += v[4]; + cvatom[i1][5] += v[5]; + cvatom[i1][6] += v[3]; + cvatom[i1][7] += v[4]; + cvatom[i1][8] += v[5]; + } + } + } +} + /* ---------------------------------------------------------------------- tally virial into global and per-atom accumulators i = local index of atom diff --git a/src/fix.h b/src/fix.h index f4ab28874b..1cce08f33f 100644 --- a/src/fix.h +++ b/src/fix.h @@ -278,6 +278,7 @@ class Fix : protected Pointers { } void v_setup(int); void v_tally(int, int *, double, double *); + void v_tally(int,int*,double,double*,int,int,int[][2],double*,double[][3]); void v_tally(int, double *); void v_tally(int, int, double); }; From 8520a7164617b8fd92dcf0e79d4a003e00b9fa9c Mon Sep 17 00:00:00 2001 From: Donatas Surblys Date: Mon, 14 Dec 2020 19:31:38 +0900 Subject: [PATCH 0011/1471] centroid stress support in shake (and rattle) --- src/RIGID/fix_shake.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index 29739b294c..0904312a75 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -66,6 +66,7 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) : create_attribute = 1; dof_flag = 1; stores_ids = 1; + centroidstressflag = CENTROID_AVAIL; // error check @@ -1764,7 +1765,10 @@ void FixShake::shake(int m) v[4] = lamda*r01[0]*r01[2]; v[5] = lamda*r01[1]*r01[2]; - v_tally(nlist,list,2.0,v); + double fpairlist[] = {lamda}; + double dellist[][3] = {{r01[0], r01[1], r01[2]}}; + int pairlist[][2] = {{i0,i1}}; + v_tally(nlist,list,2.0,v,nlocal,1,pairlist,fpairlist,dellist); } } @@ -1937,7 +1941,11 @@ void FixShake::shake3(int m) v[4] = lamda01*r01[0]*r01[2] + lamda02*r02[0]*r02[2]; v[5] = lamda01*r01[1]*r01[2] + lamda02*r02[1]*r02[2]; - v_tally(nlist,list,3.0,v); + double fpairlist[] = {lamda01, lamda02}; + double dellist[][3] = {{r01[0], r01[1], r01[2]}, + {r02[0], r02[1], r02[2]}}; + int pairlist[][2] = {{i0,i1}, {i0,i2}}; + v_tally(nlist,list,3.0,v,nlocal,2,pairlist,fpairlist,dellist); } } @@ -2189,7 +2197,12 @@ void FixShake::shake4(int m) v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda03*r03[0]*r03[2]; v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda03*r03[1]*r03[2]; - v_tally(nlist,list,4.0,v); + double fpairlist[] = {lamda01, lamda02, lamda03}; + double dellist[][3] = {{r01[0], r01[1], r01[2]}, + {r02[0], r02[1], r02[2]}, + {r03[0], r03[1], r03[2]}}; + int pairlist[][2] = {{i0,i1}, {i0,i2}, {i0,i3}}; + v_tally(nlist,list,4.0,v,nlocal,3,pairlist,fpairlist,dellist); } } @@ -2432,7 +2445,12 @@ void FixShake::shake3angle(int m) v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda12*r12[0]*r12[2]; v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda12*r12[1]*r12[2]; - v_tally(nlist,list,3.0,v); + double fpairlist[] = {lamda01, lamda02, lamda12}; + double dellist[][3] = {{r01[0], r01[1], r01[2]}, + {r02[0], r02[1], r02[2]}, + {r12[0], r12[1], r12[2]}}; + int pairlist[][2] = {{i0,i1}, {i0,i2}, {i1,i2}}; + v_tally(nlist,list,3.0,v,nlocal,3,pairlist,fpairlist,dellist); } } From 3ff8d8bf414d57ec7f1235c338cc8a0ef392abea Mon Sep 17 00:00:00 2001 From: Donatas Surblys Date: Mon, 14 Dec 2020 19:41:34 +0900 Subject: [PATCH 0012/1471] update centroid/stress/atom compute to correctly handle fixes with CENTROID_AVAIL --- src/compute_centroid_stress_atom.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/compute_centroid_stress_atom.cpp b/src/compute_centroid_stress_atom.cpp index a050c8bb6a..66dd11ac43 100644 --- a/src/compute_centroid_stress_atom.cpp +++ b/src/compute_centroid_stress_atom.cpp @@ -268,19 +268,26 @@ void ComputeCentroidStressAtom::compute_peratom() // possible during setup phase if fix has not initialized its vatom yet // e.g. fix ave/spatial defined before fix shake, // and fix ave/spatial uses a per-atom stress from this compute as input - // fix styles are CENTROID_SAME or CENTROID_NOTAVAIL + // fix styles are CENTROID_SAME, CENTROID_AVAIL or CENTROID_NOTAVAIL if (fixflag) { Fix **fix = modify->fix; int nfix = modify->nfix; for (int ifix = 0; ifix < nfix; ifix++) if (fix[ifix]->virial_peratom_flag && fix[ifix]->thermo_virial) { - double **vatom = fix[ifix]->vatom; - if (vatom) - for (i = 0; i < nlocal; i++) { - for (j = 0; j < 6; j++) stress[i][j] += vatom[i][j]; - for (j = 6; j < 9; j++) stress[i][j] += vatom[i][j - 3]; - } + if (modify->fix[ifix]->centroidstressflag == CENTROID_AVAIL) { + double **cvatom = modify->fix[ifix]->cvatom; + if (cvatom) + for (i = 0; i < nlocal; i++) + for (j = 0; j < 9; j++) stress[i][j] += cvatom[i][j]; + } else { + double **vatom = modify->fix[ifix]->vatom; + if (vatom) + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 6; j++) stress[i][j] += vatom[i][j]; + for (j = 6; j < 9; j++) stress[i][j] += vatom[i][j - 3]; + } + } } } From 0ed44e0b8117d495379e6fbba07506e3f4a75df4 Mon Sep 17 00:00:00 2001 From: Sievers Date: Fri, 16 Apr 2021 18:05:20 -0700 Subject: [PATCH 0013/1471] Remove leftover merge conflict string --- src/USER-PHONON/dynamical_matrix.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/USER-PHONON/dynamical_matrix.h b/src/USER-PHONON/dynamical_matrix.h index 251d2bc3b8..e51715f291 100644 --- a/src/USER-PHONON/dynamical_matrix.h +++ b/src/USER-PHONON/dynamical_matrix.h @@ -15,7 +15,6 @@ CommandStyle(dynamical_matrix,DynamicalMatrix) namespace LAMMPS_NS { -<<<<<<< HEAD class DynamicalMatrix : public Command { public: DynamicalMatrix(class LAMMPS *); From ac7c5592d76d104b1656216da34f4a9278bd1109 Mon Sep 17 00:00:00 2001 From: Donatas Surblys Date: Mon, 6 Dec 2021 17:45:49 +0900 Subject: [PATCH 0014/1471] add centroid virial tally function in preparation for rigid/small support --- src/fix.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/fix.h | 1 + 2 files changed, 45 insertions(+) diff --git a/src/fix.cpp b/src/fix.cpp index 029345e6c6..83e0650483 100644 --- a/src/fix.cpp +++ b/src/fix.cpp @@ -390,6 +390,50 @@ void Fix::v_tally(int n, int *list, double total, double *v) } } +/* ---------------------------------------------------------------------- + tally virial into global and per-atom accumulators + n = # of local owned atoms involved, with local indices in list + vtot = total virial for the interaction involving total atoms + rlist = list of positional vectors + flist = list of force vectors + center = centroid coordinate + increment global virial by n/total fraction + increment per-atom virial of each atom in list by 1/total fraction + add centroid form atomic virial contribution for each atom if available + this method can be used when fix computes forces in post_force() + and only total forces on each atom in group are easily available + e.g. fix rigid/small: compute virial only on owned atoms + whether newton_bond is on or off + other procs will tally left-over fractions for atoms they own +------------------------------------------------------------------------- */ + +void Fix::v_tally(int n, int *list, double total, double *vtot, + double rlist[][3], double flist[][3], double center[]) +{ + + v_tally(n, list, total, vtot); + + if (cvflag_atom) { + for (int i = 0; i< n; i++) { + const double ri0[3] = { + rlist[i][0]-center[0], + rlist[i][1]-center[1], + rlist[i][2]-center[2], + }; + cvatom[list[i]][0] += ri0[0]*flist[i][0]; + cvatom[list[i]][1] += ri0[1]*flist[i][1]; + cvatom[list[i]][2] += ri0[2]*flist[i][2]; + cvatom[list[i]][3] += ri0[0]*flist[i][1]; + cvatom[list[i]][4] += ri0[0]*flist[i][2]; + cvatom[list[i]][5] += ri0[1]*flist[i][2]; + cvatom[list[i]][6] += ri0[1]*flist[i][0]; + cvatom[list[i]][7] += ri0[2]*flist[i][0]; + cvatom[list[i]][8] += ri0[2]*flist[i][1]; + } + } + +} + /* ---------------------------------------------------------------------- tally virial into global and per-atom accumulators n = # of local owned atoms involved, with local indices in list diff --git a/src/fix.h b/src/fix.h index 1cce08f33f..339da03734 100644 --- a/src/fix.h +++ b/src/fix.h @@ -279,6 +279,7 @@ class Fix : protected Pointers { void v_setup(int); void v_tally(int, int *, double, double *); void v_tally(int,int*,double,double*,int,int,int[][2],double*,double[][3]); + void v_tally(int,int*,double,double*,double[][3],double[][3],double[]); void v_tally(int, double *); void v_tally(int, int, double); }; From b904d256cdf1d66f869f844b8f3154944c7a650c Mon Sep 17 00:00:00 2001 From: Donatas Surblys Date: Tue, 20 Apr 2021 14:06:53 +0900 Subject: [PATCH 0015/1471] implement keeping track of geometric center in rigid/small --- src/RIGID/fix_rigid_small.cpp | 76 +++++++++++++++++++++++++++++++---- src/RIGID/fix_rigid_small.h | 3 ++ 2 files changed, 72 insertions(+), 7 deletions(-) diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index 5db24a96d5..db283c6242 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -785,7 +785,7 @@ void FixRigidSmall::initial_integrate(int vflag) // forward communicate updated info of all bodies commflag = INITIAL; - comm->forward_comm_fix(this,26); + comm->forward_comm_fix(this,29); // set coords/orient and velocity/rotation of atoms in rigid bodies @@ -879,6 +879,7 @@ void FixRigidSmall::enforce2d() b->xcm[2] = 0.0; b->vcm[2] = 0.0; b->fcm[2] = 0.0; + b->xgc[2] = 0.0; b->torque[0] = 0.0; b->torque[1] = 0.0; b->angmom[0] = 0.0; @@ -1353,6 +1354,16 @@ void FixRigidSmall::set_xv() } } + // update the position of geometric center + for (int ibody = 0; ibody < nlocal_body + nghost_body; ibody++) { + Body *b = &body[ibody]; + MathExtra::matvec(b->ex_space,b->ey_space,b->ez_space, + b->xgc_body,b->xgc); + b->xgc[0] += b->xcm[0]; + b->xgc[1] += b->xcm[1]; + b->xgc[2] += b->xcm[2]; + } + // set orientation, omega, angmom of each extended particle if (extended) { @@ -1905,11 +1916,15 @@ void FixRigidSmall::setup_bodies_static() double **x = atom->x; double *xcm; + double *xgc; for (ibody = 0; ibody < nlocal_body+nghost_body; ibody++) { xcm = body[ibody].xcm; + xgc = body[ibody].xgc; xcm[0] = xcm[1] = xcm[2] = 0.0; + xgc[0] = xgc[1] = xgc[2] = 0.0; body[ibody].mass = 0.0; + body[ibody].natoms = 0; } double unwrap[3]; @@ -1924,22 +1939,31 @@ void FixRigidSmall::setup_bodies_static() domain->unmap(x[i],xcmimage[i],unwrap); xcm = b->xcm; + xgc = b->xgc; xcm[0] += unwrap[0] * massone; xcm[1] += unwrap[1] * massone; xcm[2] += unwrap[2] * massone; + xgc[0] += unwrap[0]; + xgc[1] += unwrap[1]; + xgc[2] += unwrap[2]; b->mass += massone; + b->natoms++; } // reverse communicate xcm, mass of all bodies commflag = XCM_MASS; - comm->reverse_comm_fix(this,4); + comm->reverse_comm_fix(this,8); for (ibody = 0; ibody < nlocal_body; ibody++) { xcm = body[ibody].xcm; + xgc = body[ibody].xgc; xcm[0] /= body[ibody].mass; xcm[1] /= body[ibody].mass; xcm[2] /= body[ibody].mass; + xgc[0] /= body[ibody].natoms; + xgc[1] /= body[ibody].natoms; + xgc[2] /= body[ibody].natoms; } // set vcm, angmom = 0.0 in case inpfile is used @@ -2124,12 +2148,22 @@ void FixRigidSmall::setup_bodies_static() // create initial quaternion MathExtra::exyz_to_q(ex,ey,ez,body[ibody].quat); + + // convert geometric center position to principal axis coordinates + // xcm is wrapped, but xgc is not initially + xcm = body[ibody].xcm; + xgc = body[ibody].xgc; + double delta[3]; + MathExtra::sub3(xgc,xcm,delta); + domain->minimum_image(delta); + MathExtra::transpose_matvec(ex,ey,ez,delta,body[ibody].xgc_body); + MathExtra::add3(xcm,delta,xgc); } // forward communicate updated info of all bodies commflag = INITIAL; - comm->forward_comm_fix(this,26); + comm->forward_comm_fix(this,29); // displace = initial atom coords in basis of principal axes // set displace = 0.0 for atoms not in any rigid body @@ -2807,6 +2841,10 @@ void FixRigidSmall::set_molecule(int nlocalprev, tagint tagprev, int imol, if (nlocal_body == nmax_body) grow_body(); Body *b = &body[nlocal_body]; b->mass = onemols[imol]->masstotal; + b->natoms = onemols[imol]->natoms; + b->xgc[0] = xgeom[0]; + b->xgc[1] = xgeom[1]; + b->xgc[2] = xgeom[2]; // new COM = Q (onemols[imol]->xcm - onemols[imol]->center) + xgeom // Q = rotation matrix associated with quat @@ -2829,6 +2867,12 @@ void FixRigidSmall::set_molecule(int nlocalprev, tagint tagprev, int imol, MathExtra::quatquat(quat,onemols[imol]->quat,b->quat); MathExtra::q_to_exyz(b->quat,b->ex_space,b->ey_space,b->ez_space); + MathExtra::transpose_matvec(b->ex_space,b->ey_space,b->ez_space, + ctr2com_rotate,b->xgc_body); + b->xgc_body[0] *= -1; + b->xgc_body[1] *= -1; + b->xgc_body[2] *= -1; + b->angmom[0] = b->angmom[1] = b->angmom[2] = 0.0; b->omega[0] = b->omega[1] = b->omega[2] = 0.0; b->conjqm[0] = b->conjqm[1] = b->conjqm[2] = b->conjqm[3] = 0.0; @@ -2961,7 +3005,7 @@ int FixRigidSmall::pack_forward_comm(int n, int *list, double *buf, int /*pbc_flag*/, int * /*pbc*/) { int i,j; - double *xcm,*vcm,*quat,*omega,*ex_space,*ey_space,*ez_space,*conjqm; + double *xcm,*xgc,*vcm,*quat,*omega,*ex_space,*ey_space,*ez_space,*conjqm; int m = 0; @@ -2973,6 +3017,10 @@ int FixRigidSmall::pack_forward_comm(int n, int *list, double *buf, buf[m++] = xcm[0]; buf[m++] = xcm[1]; buf[m++] = xcm[2]; + xgc = body[bodyown[j]].xgc; + buf[m++] = xgc[0]; + buf[m++] = xgc[1]; + buf[m++] = xgc[2]; vcm = body[bodyown[j]].vcm; buf[m++] = vcm[0]; buf[m++] = vcm[1]; @@ -3048,7 +3096,7 @@ int FixRigidSmall::pack_forward_comm(int n, int *list, double *buf, void FixRigidSmall::unpack_forward_comm(int n, int first, double *buf) { int i,j,last; - double *xcm,*vcm,*quat,*omega,*ex_space,*ey_space,*ez_space,*conjqm; + double *xcm,*xgc,*vcm,*quat,*omega,*ex_space,*ey_space,*ez_space,*conjqm; int m = 0; last = first + n; @@ -3060,6 +3108,10 @@ void FixRigidSmall::unpack_forward_comm(int n, int first, double *buf) xcm[0] = buf[m++]; xcm[1] = buf[m++]; xcm[2] = buf[m++]; + xgc = body[bodyown[i]].xgc; + xgc[0] = buf[m++]; + xgc[1] = buf[m++]; + xgc[2] = buf[m++]; vcm = body[bodyown[i]].vcm; vcm[0] = buf[m++]; vcm[1] = buf[m++]; @@ -3135,7 +3187,7 @@ void FixRigidSmall::unpack_forward_comm(int n, int first, double *buf) int FixRigidSmall::pack_reverse_comm(int n, int first, double *buf) { int i,j,m,last; - double *fcm,*torque,*vcm,*angmom,*xcm; + double *fcm,*torque,*vcm,*angmom,*xcm, *xgc; m = 0; last = first + n; @@ -3170,10 +3222,15 @@ int FixRigidSmall::pack_reverse_comm(int n, int first, double *buf) for (i = first; i < last; i++) { if (bodyown[i] < 0) continue; xcm = body[bodyown[i]].xcm; + xgc = body[bodyown[i]].xgc; buf[m++] = xcm[0]; buf[m++] = xcm[1]; buf[m++] = xcm[2]; + buf[m++] = xgc[0]; + buf[m++] = xgc[1]; + buf[m++] = xgc[2]; buf[m++] = body[bodyown[i]].mass; + buf[m++] = static_cast(body[bodyown[i]].natoms); } } else if (commflag == ITENSOR) { @@ -3208,7 +3265,7 @@ int FixRigidSmall::pack_reverse_comm(int n, int first, double *buf) void FixRigidSmall::unpack_reverse_comm(int n, int *list, double *buf) { int i,j,k; - double *fcm,*torque,*vcm,*angmom,*xcm; + double *fcm,*torque,*vcm,*angmom,*xcm, *xgc; int m = 0; @@ -3245,10 +3302,15 @@ void FixRigidSmall::unpack_reverse_comm(int n, int *list, double *buf) j = list[i]; if (bodyown[j] < 0) continue; xcm = body[bodyown[j]].xcm; + xgc = body[bodyown[j]].xgc; xcm[0] += buf[m++]; xcm[1] += buf[m++]; xcm[2] += buf[m++]; + xgc[0] += buf[m++]; + xgc[1] += buf[m++]; + xgc[2] += buf[m++]; body[bodyown[j]].mass += buf[m++]; + body[bodyown[j]].natoms += static_cast(buf[m++]); } } else if (commflag == ITENSOR) { diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index 60a4dd1161..e289c179d9 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -85,7 +85,9 @@ class FixRigidSmall : public Fix { struct Body { double mass; // total mass of body + int natoms; // total number of atoms in body double xcm[3]; // COM position + double xgc[3]; // geometric center position double vcm[3]; // COM velocity double fcm[3]; // force on COM double torque[3]; // torque around COM @@ -94,6 +96,7 @@ class FixRigidSmall : public Fix { double ex_space[3]; // principal axes in space coords double ey_space[3]; double ez_space[3]; + double xgc_body[3]; // geometric center relative to xcm in body coords double angmom[3]; // space-frame angular momentum of body double omega[3]; // space-frame omega of body double conjqm[4]; // conjugate quaternion momentum From 0fc73c9d676728ff7226877cd2ee4f6621d731e2 Mon Sep 17 00:00:00 2001 From: Donatas Surblys Date: Mon, 19 Apr 2021 16:18:06 +0900 Subject: [PATCH 0016/1471] support for centroid virial stress in rigid/small --- src/RIGID/fix_rigid_small.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index db283c6242..14742155db 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -73,6 +73,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : dof_flag = 1; enforce2d_flag = 1; stores_ids = 1; + centroidstressflag = CENTROID_AVAIL; MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); @@ -1350,7 +1351,9 @@ void FixRigidSmall::set_xv() vr[4] = 0.5*x0*fc2; vr[5] = 0.5*x1*fc2; - v_tally(1,&i,1.0,vr); + double rlist[][3] = {x0, x1, x2}; + double flist[][3] = {0.5*fc0, 0.5*fc1, 0.5*fc2}; + v_tally(1,&i,1.0,vr,rlist,flist,b->xgc); } } @@ -1510,7 +1513,9 @@ void FixRigidSmall::set_v() vr[4] = 0.5*x0*fc2; vr[5] = 0.5*x1*fc2; - v_tally(1,&i,1.0,vr); + double rlist[][3] = {x0, x1, x2}; + double flist[][3] = {0.5*fc0, 0.5*fc1, 0.5*fc2}; + v_tally(1,&i,1.0,vr,rlist,flist,b->xgc); } } From 1ee8de42d9bc56fea68d4a9758b9dc5e68d1c1b0 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 21 Apr 2021 12:08:37 -0400 Subject: [PATCH 0017/1471] minor cleanups and simplifications using fmtlib --- src/USER-PHONON/dynamical_matrix.cpp | 27 +++++++------- src/USER-PHONON/dynamical_matrix.h | 1 - src/USER-PHONON/third_order.cpp | 53 ++++++++++++---------------- src/USER-PHONON/third_order.h | 1 - 4 files changed, 34 insertions(+), 48 deletions(-) diff --git a/src/USER-PHONON/dynamical_matrix.cpp b/src/USER-PHONON/dynamical_matrix.cpp index e86b511e1a..9c6c6fe029 100644 --- a/src/USER-PHONON/dynamical_matrix.cpp +++ b/src/USER-PHONON/dynamical_matrix.cpp @@ -21,9 +21,8 @@ #include "neighbor.h" #include "pair.h" #include "timer.h" -#include "utils.h" -#include "fmt/format.h" #include "update.h" + #include #include @@ -41,7 +40,7 @@ DynamicalMatrix::DynamicalMatrix(LAMMPS *lmp) : Command(lmp), fp(nullptr) DynamicalMatrix::~DynamicalMatrix() { - if (fp && me == 0) fclose(fp); + if (fp && comm->me == 0) fclose(fp); memory->destroy(groupmap); fp = nullptr; } @@ -90,8 +89,6 @@ void DynamicalMatrix::setup() void DynamicalMatrix::command(int narg, char **arg) { - MPI_Comm_rank(world,&me); - if (domain->box_exist == 0) error->all(FLERR,"Dynamical_matrix command before simulation box is defined"); if (narg < 2) error->all(FLERR,"Illegal dynamical_matrix command"); @@ -120,7 +117,7 @@ void DynamicalMatrix::command(int narg, char **arg) int style = -1; if (strcmp(arg[1],"regular") == 0) style = REGULAR; else if (strcmp(arg[1],"eskm") == 0) style = ESKM; - else error->all(FLERR,"Illegal Dynamical Matrix command"); + else error->all(FLERR,"Illegal dynamical_matrix command"); del = utils::numeric(FLERR, arg[2],false,lmp); // set option defaults @@ -136,13 +133,13 @@ void DynamicalMatrix::command(int narg, char **arg) // read options from end of input line if (style == REGULAR) options(narg-3,&arg[3]); //COME BACK else if (style == ESKM) options(narg-3,&arg[3]); //COME BACK - else if (comm->me == 0 && screen) fprintf(screen,"Illegal Dynamical Matrix command\n"); + else error->all(FLERR,"Illegal dynamical_matrix command"); if (!folded) dynlenb = dynlen; else dynlenb = (atom->natoms)*3; if (atom->map_style == Atom::MAP_NONE) - error->all(FLERR,"Dynamical_matrix command requires an atom map, see atom_modify"); + error->all(FLERR,"Dynamical_matrix command requires an atom map"); // move atoms by 3-vector or specified variable(s) @@ -219,7 +216,6 @@ void DynamicalMatrix::options(int narg, char **arg) void DynamicalMatrix::openfile(const char* filename) { // if file already opened, return - //if (me!=0) return; if (file_opened) return; if (compressed) { @@ -276,7 +272,7 @@ void DynamicalMatrix::calculateMatrix() utils::logmesg(lmp,fmt::format("Calculating Dynamical Matrix ...\n")); utils::logmesg(lmp,fmt::format(" Total # of atoms = {}\n", natoms)); utils::logmesg(lmp,fmt::format(" Atoms in group = {}\n", gcount)); - utils::logmesg(lmp,fmt::format(" Total dynamical matrix elements = {}\n", (dynlenb*dynlen) )); + utils::logmesg(lmp,fmt::format(" Total dynamical matrix elements = {}\n", (dynlenb*dynlen))); } // emit dynlen rows of dimalpha*dynlen*dimbeta elements @@ -336,7 +332,7 @@ void DynamicalMatrix::calculateMatrix() } for (int k=0; k<3; k++) MPI_Reduce(dynmat[k],fdynmat[k],dynlenb,MPI_DOUBLE,MPI_SUM,0,world); - if (me == 0) + if (comm->me == 0) writeMatrix(fdynmat); dynmat_clear(dynmat); if (comm->me == 0 && screen) { @@ -358,7 +354,7 @@ void DynamicalMatrix::calculateMatrix() delete [] fdynmat[i]; delete [] fdynmat; - if (screen && me ==0 ) fprintf(screen,"Finished Calculating Dynamical Matrix\n"); + if (screen && comm->me ==0) fprintf(screen,"Finished Calculating Dynamical Matrix\n"); } /* ---------------------------------------------------------------------- @@ -367,7 +363,7 @@ void DynamicalMatrix::calculateMatrix() void DynamicalMatrix::writeMatrix(double **dynmat) { - if (me != 0 || !fp) + if (comm->me != 0 || !fp) return; clearerr(fp); @@ -379,8 +375,9 @@ void DynamicalMatrix::writeMatrix(double **dynmat) } else { for (int i = 0; i < 3; i++) { for (bigint j = 0; j < dynlenb; j++) { - if ((j+1)%3==0) fprintf(fp, "%4.8f\n", dynmat[i][j]); - else fprintf(fp, "%4.8f ",dynmat[i][j]); + fmt::print(fp, "{:.8f}", dynmat[i][j]); + if ((j+1)%3==0) fputs("\n",fp); + else fputs(" ",fp); } } if (ferror(fp)) diff --git a/src/USER-PHONON/dynamical_matrix.h b/src/USER-PHONON/dynamical_matrix.h index e51715f291..1c5333f31d 100644 --- a/src/USER-PHONON/dynamical_matrix.h +++ b/src/USER-PHONON/dynamical_matrix.h @@ -58,7 +58,6 @@ class DynamicalMatrix : public Command { bigint dynlen; // rank of dynamical matrix bigint dynlenb; // new dynlen if folded int scaleflag; - int me; bigint *groupmap; int compressed; // 1 if dump file is written compressed, 0 no diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index 53a0d280cc..6a4ef24751 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -18,17 +18,14 @@ #include "kspace.h" #include "math_special.h" #include "memory.h" -#include "neighbor.h" +#include "modify.h" #include "neigh_list.h" #include "neigh_request.h" -#include "modify.h" +#include "neighbor.h" #include "pair.h" #include "timer.h" -#include "finish.h" -#include "math_special.h" -#include "utils.h" -#include "fmt/format.h" #include "update.h" + #include #include @@ -47,7 +44,7 @@ ThirdOrder::ThirdOrder(LAMMPS *lmp) : Command(lmp), fp(nullptr) ThirdOrder::~ThirdOrder() { - if (fp && me == 0) fclose(fp); + if (fp && comm->me == 0) fclose(fp); fp = nullptr; memory->destroy(groupmap); // memory->destroy(ijnum); @@ -101,8 +98,6 @@ void ThirdOrder::setup() void ThirdOrder::command(int narg, char **arg) { - MPI_Comm_rank(world,&me); - if (domain->box_exist == 0) error->all(FLERR,"third_order command before simulation box is defined"); if (narg < 2) error->all(FLERR,"Illegal third_order command"); @@ -161,14 +156,14 @@ void ThirdOrder::command(int narg, char **arg) // read options from end of input line if (style == REGULAR) options(narg-3,&arg[3]); //COME BACK else if (style == ESKM) options(narg-3,&arg[3]); //COME BACK - else if (comm->me == 0 && screen) fprintf(screen,"Illegal Dynamical Matrix command\n"); + else error->all(FLERR,"Illegal third_order command"); del = utils::numeric(FLERR, arg[2],false,lmp); if (!folded) dynlenb = dynlen; else dynlenb = (atom->natoms)*3; if (atom->map_style == Atom::MAP_NONE) - error->all(FLERR,"third_order command requires an atom map, see atom_modify"); + error->all(FLERR,"third_order command requires an atom map"); // move atoms by 3-vector or specified variable(s) @@ -230,7 +225,7 @@ void ThirdOrder::options(int narg, char **arg) iarg += 2; } else error->all(FLERR,"Illegal third_order command"); } - if (file_flag == 1 and me == 0) { + if (file_flag == 1 && comm->me == 0) { openfile(filename); } } @@ -297,7 +292,7 @@ void ThirdOrder::calculateMatrix() utils::logmesg(lmp, fmt::format("Calculating Third Order ...\n")); utils::logmesg(lmp, fmt::format(" Total # of atoms = {}\n", natoms)); utils::logmesg(lmp, fmt::format(" Atoms in group = {}\n", gcount)); - utils::logmesg(lmp, fmt::format(" Total third order elements = {}\n", (dynlen*dynlenb*dynlenb) )); + utils::logmesg(lmp, fmt::format(" Total third order elements = {}\n", (dynlen*dynlenb*dynlenb))); } update->nsteps = 0; @@ -387,7 +382,7 @@ void ThirdOrder::calculateMatrix() displace_atom(local_jdx, beta, 1); displace_atom(local_idx, alpha, 1); MPI_Reduce(dynmat,fdynmat,dynlenb,MPI_DOUBLE,MPI_SUM,0,world); - if (me == 0){ + if (comm->me == 0){ if (folded) { writeMatrix(fdynmat, gm[i-1], alpha, j, beta); } else { @@ -411,7 +406,7 @@ void ThirdOrder::calculateMatrix() delete [] dynmat; delete [] fdynmat; - if (screen && me ==0 ) + if (screen && comm->me ==0) fprintf(screen,"Finished Calculating Third Order Tensor\n"); } @@ -421,7 +416,7 @@ void ThirdOrder::calculateMatrix() void ThirdOrder::writeMatrix(double *dynmat, bigint i, int a, bigint j, int b) { - if (me != 0) + if (comm->me != 0) return; double norm; @@ -433,13 +428,11 @@ void ThirdOrder::writeMatrix(double *dynmat, bigint i, int a, bigint j, int b) square(dynmat[k*3+1])+ square(dynmat[k*3+2]); if (norm > 1.0e-16) - fprintf(fp, - BIGINT_FORMAT " %d " BIGINT_FORMAT - " %d %d %7.8f %7.8f %7.8f\n", - i+1, a + 1, j+1, b + 1, k+1, - dynmat[k*3] * conversion, - dynmat[k*3+1] * conversion, - dynmat[k*3+2] * conversion); + fmt::print(fp,"{} {} {} {} {} {:.8f} {.8f} {.8f}\n", + i+1, a + 1, j+1, b + 1, k+1, + dynmat[k*3] * conversion, + dynmat[k*3+1] * conversion, + dynmat[k*3+2] * conversion); } } else { for (int k = 0; k < gcount; k++){ @@ -447,13 +440,11 @@ void ThirdOrder::writeMatrix(double *dynmat, bigint i, int a, bigint j, int b) square(dynmat[k*3+1])+ square(dynmat[k*3+2]); if (norm > 1.0e-16) - fprintf(fp, - BIGINT_FORMAT " %d " BIGINT_FORMAT " %d " - BIGINT_FORMAT " %7.8f %7.8f %7.8f\n", - i+1, a + 1, j+1, b + 1, groupmap[k]+1, - dynmat[k*3] * conversion, - dynmat[k*3+1] * conversion, - dynmat[k*3+2] * conversion); + fmt::print(fp, "{} {} {} {} {} {:.8f} {.8f} {.8f}\n", + i+1, a + 1, j+1, b + 1, groupmap[k]+1, + dynmat[k*3] * conversion, + dynmat[k*3+1] * conversion, + dynmat[k*3+2] * conversion); } } } else if (binaryflag && fp) { @@ -809,4 +800,4 @@ void ThirdOrder::getNeighbortags() { free (firsttags); free (ijnumproc); free (temptags); -} \ No newline at end of file +} diff --git a/src/USER-PHONON/third_order.h b/src/USER-PHONON/third_order.h index a833cc4754..201eff9383 100644 --- a/src/USER-PHONON/third_order.h +++ b/src/USER-PHONON/third_order.h @@ -59,7 +59,6 @@ namespace LAMMPS_NS { bigint dynlen; bigint dynlenb; int scaleflag; - int me; bigint gcount; // number of atoms in group bigint *groupmap; From 598e82d2366c176ed9476930b030c1a31594b734 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 21 Apr 2021 12:16:43 -0400 Subject: [PATCH 0018/1471] small cosmetic changes --- src/USER-PHONON/dynamical_matrix.cpp | 26 ++++++++++---------------- src/USER-PHONON/third_order.cpp | 18 ++++++++---------- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/src/USER-PHONON/dynamical_matrix.cpp b/src/USER-PHONON/dynamical_matrix.cpp index 9c6c6fe029..7dbd0d0183 100644 --- a/src/USER-PHONON/dynamical_matrix.cpp +++ b/src/USER-PHONON/dynamical_matrix.cpp @@ -79,7 +79,7 @@ void DynamicalMatrix::setup() //if all then skip communication groupmap population if (gcount == atom->natoms) - for (bigint i=0; inatoms; i++) + for (bigint i=0; i < atom->natoms; i++) groupmap[i] = i; else create_groupmap(); @@ -179,13 +179,11 @@ void DynamicalMatrix::options(int narg, char **arg) if (iarg + 2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); if (strcmp(arg[iarg+1],"gzip") == 0) { compressed = 1; - } - else if (strcmp(arg[iarg+1],"yes") == 0) { + } else if (strcmp(arg[iarg+1],"yes") == 0) { binaryflag = 1; } iarg += 2; - } - else if (strcmp(arg[iarg],"file") == 0) { + } else if (strcmp(arg[iarg],"file") == 0) { if (iarg+2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); filename = arg[iarg + 1]; file_flag = 1; @@ -194,11 +192,9 @@ void DynamicalMatrix::options(int narg, char **arg) if (iarg+2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); if (strcmp(arg[iarg+1],"yes") == 0) { folded = 1; - } - else if (strcmp(arg[iarg+1],"no") == 0) { + } else if (strcmp(arg[iarg+1],"no") == 0) { folded = 0; - } - else error->all(FLERR,"Illegal input for dynamical_matrix fold option"); + } else error->all(FLERR,"Illegal input for dynamical_matrix fold option"); iarg += 2; } else error->all(FLERR,"Illegal dynamical_matrix command"); } @@ -294,8 +290,7 @@ void DynamicalMatrix::calculateMatrix() for (int beta=0; beta<3; beta++){ dynmat[alpha][(j-1)*3+beta] -= f[local_jdx][beta]; } - } - else { + } else { for (int beta=0; beta<3; beta++){ dynmat[alpha][gm[j-1]*3+beta] -= f[local_jdx][beta]; } @@ -318,8 +313,7 @@ void DynamicalMatrix::calculateMatrix() dynmat[alpha][(j-1)*3+beta] /= (2 * del * imass); dynmat[alpha][(j-1)*3+beta] *= conversion; } - } - else{ + } else { for (int beta=0; beta<3; beta++){ dynmat[alpha][gm[j-1]*3+beta] -= -f[local_jdx][beta]; dynmat[alpha][gm[j-1]*3+beta] /= (2 * del * imass); @@ -573,12 +567,12 @@ void DynamicalMatrix::create_groupmap() } //populate arrays for Allgatherv - for (int i=0; inprocs; i++) { + for (int i=0; i < comm->nprocs; i++) { recv[i] = 0; } recv[comm->me] = gid; MPI_Allreduce(recv,displs,comm->nprocs,MPI_INT,MPI_SUM,world); - for (int i=0; inprocs; i++) { + for (int i=0; i < comm->nprocs; i++) { recv[i]=displs[i]; if (i>0) displs[i] = displs[i-1]+recv[i-1]; else displs[i] = 0; @@ -590,7 +584,7 @@ void DynamicalMatrix::create_groupmap() //populate member groupmap based on temp groupmap bigint j = 0; - for (bigint i=1; i<=natoms; i++) { + for (bigint i=1; i <= natoms; i++) { // flag groupmap contents that are in temp_groupmap if (j < gcount && i == temp_groupmap[j]) groupmap[i-1] = j++; diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index 6a4ef24751..010924982d 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -26,8 +26,8 @@ #include "timer.h" #include "update.h" -#include #include +#include using namespace LAMMPS_NS; using namespace MathSpecial; @@ -217,11 +217,9 @@ void ThirdOrder::options(int narg, char **arg) if (iarg+2 > narg) error->all(FLERR, "Illegal third_order command"); if (strcmp(arg[iarg+1],"yes") == 0) { folded = 1; - } - else if (strcmp(arg[iarg+1],"no") == 0) { + } else if (strcmp(arg[iarg+1],"no") == 0) { folded = 0; - } - else error->all(FLERR,"Illegal input for third_order fold option"); + } else error->all(FLERR,"Illegal input for third_order fold option"); iarg += 2; } else error->all(FLERR,"Illegal third_order command"); } @@ -608,7 +606,7 @@ void ThirdOrder::create_groupmap() bigint *temp_groupmap = new bigint[natoms]; //find number of local atoms in the group (final_gid) - for (bigint i=1; i<=natoms; i++) { + for (bigint i=1; i <= natoms; i++) { local_idx = atom->map(i); if ((local_idx >= 0) && (local_idx < nlocal) && mask[local_idx] & groupbit) gid += 1; // gid at the end of loop is final_Gid @@ -618,7 +616,7 @@ void ThirdOrder::create_groupmap() gid = 0; //create a map between global atom id and group atom id for each proc - for (bigint i=1; i<=natoms; i++) { + for (bigint i=1; i <= natoms; i++) { local_idx = atom->map(i); if ((local_idx >= 0) && (local_idx < nlocal) && (mask[local_idx] & groupbit)) { @@ -628,12 +626,12 @@ void ThirdOrder::create_groupmap() } //populate arrays for Allgatherv - for (int i=0; inprocs; i++) { + for (int i=0; i < comm->nprocs; i++) { recv[i] = 0; } recv[comm->me] = gid; MPI_Allreduce(recv,displs,comm->nprocs,MPI_INT,MPI_SUM,world); - for (int i=0; inprocs; i++) { + for (int i=0; i < comm->nprocs; i++) { recv[i]=displs[i]; if (i>0) displs[i] = displs[i-1]+recv[i-1]; else displs[i] = 0; @@ -646,7 +644,7 @@ void ThirdOrder::create_groupmap() //populate member groupmap based on temp groupmap bigint j = 0; - for (bigint i=1; i<=natoms; i++) { + for (bigint i=1; i <= natoms; i++) { // flag groupmap contents that are in temp_groupmap if (j < gcount && i == temp_groupmap[j]) groupmap[i-1] = j++; From b982542ae66b0a6d8a5712d28c1fa4b7a77ccc91 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 21 Apr 2021 14:56:26 -0400 Subject: [PATCH 0019/1471] update indentation to 2 blanks. avoid "hanging else" constructs. --- src/USER-PHONON/dynamical_matrix.cpp | 766 +++++++++++++-------------- src/USER-PHONON/third_order.cpp | 8 +- 2 files changed, 387 insertions(+), 387 deletions(-) diff --git a/src/USER-PHONON/dynamical_matrix.cpp b/src/USER-PHONON/dynamical_matrix.cpp index 7dbd0d0183..b43227fd7c 100644 --- a/src/USER-PHONON/dynamical_matrix.cpp +++ b/src/USER-PHONON/dynamical_matrix.cpp @@ -33,16 +33,16 @@ enum{REGULAR,ESKM}; DynamicalMatrix::DynamicalMatrix(LAMMPS *lmp) : Command(lmp), fp(nullptr) { - external_force_clear = 1; + external_force_clear = 1; } /* ---------------------------------------------------------------------- */ DynamicalMatrix::~DynamicalMatrix() { - if (fp && comm->me == 0) fclose(fp); - memory->destroy(groupmap); - fp = nullptr; + if (fp && comm->me == 0) fclose(fp); + memory->destroy(groupmap); + fp = nullptr; } /* ---------------------------------------------------------------------- @@ -53,116 +53,116 @@ DynamicalMatrix::~DynamicalMatrix() void DynamicalMatrix::setup() { - // setup domain, communication and neighboring - // acquire ghosts - // build neighbor lists - if (triclinic) domain->x2lamda(atom->nlocal); - domain->pbc(); - domain->reset_box(); - comm->setup(); - if (neighbor->style) neighbor->setup_bins(); - comm->exchange(); - comm->borders(); - if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); - domain->image_check(); - domain->box_too_small_check(); - neighbor->build(1); + // setup domain, communication and neighboring + // acquire ghosts + // build neighbor lists + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + comm->exchange(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + domain->image_check(); + domain->box_too_small_check(); + neighbor->build(1); - // compute all forces - external_force_clear = 0; - eflag=0; - vflag=0; - update_force(); + // compute all forces + external_force_clear = 0; + eflag=0; + vflag=0; + update_force(); - modify->setup(vflag); - update->setupflag = 0; + modify->setup(vflag); + update->setupflag = 0; - //if all then skip communication groupmap population - if (gcount == atom->natoms) - for (bigint i=0; i < atom->natoms; i++) - groupmap[i] = i; - else - create_groupmap(); + //if all then skip communication groupmap population + if (gcount == atom->natoms) + for (bigint i=0; i < atom->natoms; i++) + groupmap[i] = i; + else + create_groupmap(); } /* ---------------------------------------------------------------------- */ void DynamicalMatrix::command(int narg, char **arg) { - if (domain->box_exist == 0) - error->all(FLERR,"Dynamical_matrix command before simulation box is defined"); - if (narg < 2) error->all(FLERR,"Illegal dynamical_matrix command"); + if (domain->box_exist == 0) + error->all(FLERR,"Dynamical_matrix command before simulation box is defined"); + if (narg < 2) error->all(FLERR,"Illegal dynamical_matrix command"); - lmp->init(); + lmp->init(); - // orthogonal vs triclinic simulation box + // orthogonal vs triclinic simulation box - triclinic = domain->triclinic; + triclinic = domain->triclinic; - if (force->pair && force->pair->compute_flag) pair_compute_flag = 1; - else pair_compute_flag = 0; - if (force->kspace && force->kspace->compute_flag) kspace_compute_flag = 1; - else kspace_compute_flag = 0; + if (force->pair && force->pair->compute_flag) pair_compute_flag = 1; + else pair_compute_flag = 0; + if (force->kspace && force->kspace->compute_flag) kspace_compute_flag = 1; + else kspace_compute_flag = 0; - // group and style + // group and style - igroup = group->find(arg[0]); - if (igroup == -1) error->all(FLERR,"Could not find dynamical matrix group ID"); - groupbit = group->bitmask[igroup]; - gcount = group->count(igroup); - dynlen = (gcount)*3; - memory->create(groupmap,atom->natoms,"total_group_map:totalgm"); - update->setupflag = 1; + igroup = group->find(arg[0]); + if (igroup == -1) error->all(FLERR,"Could not find dynamical matrix group ID"); + groupbit = group->bitmask[igroup]; + gcount = group->count(igroup); + dynlen = (gcount)*3; + memory->create(groupmap,atom->natoms,"total_group_map:totalgm"); + update->setupflag = 1; - int style = -1; - if (strcmp(arg[1],"regular") == 0) style = REGULAR; - else if (strcmp(arg[1],"eskm") == 0) style = ESKM; - else error->all(FLERR,"Illegal dynamical_matrix command"); - del = utils::numeric(FLERR, arg[2],false,lmp); + int style = -1; + if (strcmp(arg[1],"regular") == 0) style = REGULAR; + else if (strcmp(arg[1],"eskm") == 0) style = ESKM; + else error->all(FLERR,"Illegal dynamical_matrix command"); + del = utils::numeric(FLERR, arg[2],false,lmp); - // set option defaults + // set option defaults - binaryflag = 0; - scaleflag = 0; - compressed = 0; - file_flag = 0; - file_opened = 0; - folded = 0; - conversion = 1; + binaryflag = 0; + scaleflag = 0; + compressed = 0; + file_flag = 0; + file_opened = 0; + folded = 0; + conversion = 1; - // read options from end of input line - if (style == REGULAR) options(narg-3,&arg[3]); //COME BACK - else if (style == ESKM) options(narg-3,&arg[3]); //COME BACK - else error->all(FLERR,"Illegal dynamical_matrix command"); + // read options from end of input line + if (style == REGULAR) options(narg-3,&arg[3]); //COME BACK + else if (style == ESKM) options(narg-3,&arg[3]); //COME BACK + else error->all(FLERR,"Illegal dynamical_matrix command"); - if (!folded) dynlenb = dynlen; - else dynlenb = (atom->natoms)*3; + if (!folded) dynlenb = dynlen; + else dynlenb = (atom->natoms)*3; - if (atom->map_style == Atom::MAP_NONE) - error->all(FLERR,"Dynamical_matrix command requires an atom map"); + if (atom->map_style == Atom::MAP_NONE) + error->all(FLERR,"Dynamical_matrix command requires an atom map"); - // move atoms by 3-vector or specified variable(s) + // move atoms by 3-vector or specified variable(s) - if (style == REGULAR) { - setup(); - timer->init(); - timer->barrier_start(); - calculateMatrix(); - timer->barrier_stop(); - } + if (style == REGULAR) { + setup(); + timer->init(); + timer->barrier_start(); + calculateMatrix(); + timer->barrier_stop(); + } - if (style == ESKM) { - setup(); - convert_units(update->unit_style); - conversion = conv_energy/conv_distance/conv_mass; - timer->init(); - timer->barrier_start(); - calculateMatrix(); - timer->barrier_stop(); - } + if (style == ESKM) { + setup(); + convert_units(update->unit_style); + conversion = conv_energy/conv_distance/conv_mass; + timer->init(); + timer->barrier_start(); + calculateMatrix(); + timer->barrier_stop(); + } - Finish finish(lmp); - finish.end(1); + Finish finish(lmp); + finish.end(1); } /* ---------------------------------------------------------------------- @@ -171,36 +171,36 @@ void DynamicalMatrix::command(int narg, char **arg) void DynamicalMatrix::options(int narg, char **arg) { - if (narg < 0) error->all(FLERR,"Illegal dynamical_matrix command"); - int iarg = 0; - const char* filename = "dynmat.dyn"; - while (iarg < narg) { - if (strcmp(arg[iarg],"binary") == 0) { - if (iarg + 2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); - if (strcmp(arg[iarg+1],"gzip") == 0) { - compressed = 1; - } else if (strcmp(arg[iarg+1],"yes") == 0) { - binaryflag = 1; - } - iarg += 2; - } else if (strcmp(arg[iarg],"file") == 0) { - if (iarg+2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); - filename = arg[iarg + 1]; - file_flag = 1; - iarg += 2; - } else if (strcmp(arg[iarg],"fold") == 0) { - if (iarg+2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); - if (strcmp(arg[iarg+1],"yes") == 0) { - folded = 1; - } else if (strcmp(arg[iarg+1],"no") == 0) { - folded = 0; - } else error->all(FLERR,"Illegal input for dynamical_matrix fold option"); - iarg += 2; - } else error->all(FLERR,"Illegal dynamical_matrix command"); - } - if (file_flag == 1) { - openfile(filename); - } + if (narg < 0) error->all(FLERR,"Illegal dynamical_matrix command"); + int iarg = 0; + const char* filename = "dynmat.dyn"; + while (iarg < narg) { + if (strcmp(arg[iarg],"binary") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); + if (strcmp(arg[iarg+1],"gzip") == 0) { + compressed = 1; + } else if (strcmp(arg[iarg+1],"yes") == 0) { + binaryflag = 1; + } + iarg += 2; + } else if (strcmp(arg[iarg],"file") == 0) { + if (iarg+2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); + filename = arg[iarg + 1]; + file_flag = 1; + iarg += 2; + } else if (strcmp(arg[iarg],"fold") == 0) { + if (iarg+2 > narg) error->all(FLERR, "Illegal dynamical_matrix command"); + if (strcmp(arg[iarg+1],"yes") == 0) { + folded = 1; + } else if (strcmp(arg[iarg+1],"no") == 0) { + folded = 0; + } else error->all(FLERR,"Illegal input for dynamical_matrix fold option"); + iarg += 2; + } else error->all(FLERR,"Illegal dynamical_matrix command"); + } + if (file_flag == 1) { + openfile(filename); + } } /* ---------------------------------------------------------------------- @@ -211,12 +211,12 @@ void DynamicalMatrix::options(int narg, char **arg) void DynamicalMatrix::openfile(const char* filename) { - // if file already opened, return - if (file_opened) return; + // if file already opened, return + if (file_opened) return; - if (compressed) { + if (compressed) { #ifdef LAMMPS_GZIP - char gzip[128]; + char gzip[128]; sprintf(gzip,"gzip -6 > %s",filename); #ifdef _WIN32 fp = _popen(gzip,"wb"); @@ -224,17 +224,17 @@ void DynamicalMatrix::openfile(const char* filename) fp = popen(gzip,"w"); #endif #else - error->one(FLERR,"Cannot open gzipped file"); + error->one(FLERR,"Cannot open gzipped file"); #endif - } else if (binaryflag) { - fp = fopen(filename,"wb"); - } else { - fp = fopen(filename,"w"); - } + } else if (binaryflag) { + fp = fopen(filename,"wb"); + } else { + fp = fopen(filename,"w"); + } - if (fp == nullptr) error->one(FLERR,"Cannot open dump file"); + if (fp == nullptr) error->one(FLERR,"Cannot open dump file"); - file_opened = 1; + file_opened = 1; } /* ---------------------------------------------------------------------- @@ -243,112 +243,112 @@ void DynamicalMatrix::openfile(const char* filename) void DynamicalMatrix::calculateMatrix() { - int local_idx; // local index - int local_jdx; // second local index - int nlocal = atom->nlocal; - bigint natoms = atom->natoms; - int *type = atom->type; - bigint *gm = groupmap; - double imass; // dynamical matrix element - double *m = atom->mass; - double **f = atom->f; + int local_idx; // local index + int local_jdx; // second local index + int nlocal = atom->nlocal; + bigint natoms = atom->natoms; + int *type = atom->type; + bigint *gm = groupmap; + double imass; // dynamical matrix element + double *m = atom->mass; + double **f = atom->f; - double **dynmat = new double*[3]; - for (int i=0; i<3; i++) - dynmat[i] = new double[dynlenb]; + double **dynmat = new double*[3]; + for (int i=0; i<3; i++) + dynmat[i] = new double[dynlenb]; - double **fdynmat = new double*[3]; - for (int i=0; i<3; i++) - fdynmat[i] = new double[dynlenb]; + double **fdynmat = new double*[3]; + for (int i=0; i<3; i++) + fdynmat[i] = new double[dynlenb]; - //initialize dynmat to all zeros + //initialize dynmat to all zeros + dynmat_clear(dynmat); + + if (comm->me == 0) { + utils::logmesg(lmp,fmt::format("Calculating Dynamical Matrix ...\n")); + utils::logmesg(lmp,fmt::format(" Total # of atoms = {}\n", natoms)); + utils::logmesg(lmp,fmt::format(" Atoms in group = {}\n", gcount)); + utils::logmesg(lmp,fmt::format(" Total dynamical matrix elements = {}\n", (dynlenb*dynlen))); + } + + // emit dynlen rows of dimalpha*dynlen*dimbeta elements + + update->nsteps = 0; + int prog = 0; + for (bigint i=1; i<=natoms; i++) { + local_idx = atom->map(i); + if (gm[i-1] < 0) + continue; + for (int alpha=0; alpha<3; alpha++) { + displace_atom(local_idx, alpha, 1); + update_force(); + for (bigint j=1; j<=natoms; j++) { + local_jdx = atom->map(j); + if (local_idx >= 0 && local_jdx >= 0 && local_jdx < nlocal + && (gm[j-1] >= 0 || folded)){ + if (folded) { + for (int beta=0; beta<3; beta++){ + dynmat[alpha][(j-1)*3+beta] -= f[local_jdx][beta]; + } + } else { + for (int beta=0; beta<3; beta++){ + dynmat[alpha][gm[j-1]*3+beta] -= f[local_jdx][beta]; + } + } + } + } + displace_atom(local_idx,alpha,-2); + update_force(); + for (bigint j=1; j<=natoms; j++) { + local_jdx = atom->map(j); + if (local_idx >= 0 && local_jdx >= 0 && local_jdx < nlocal + && (gm[j-1] >= 0 || folded)){ + if (atom->rmass_flag == 1) + imass = sqrt(m[local_idx] * m[local_jdx]); + else + imass = sqrt(m[type[local_idx]] * m[type[local_jdx]]); + if (folded){ + for (int beta=0; beta<3; beta++){ + dynmat[alpha][(j-1)*3+beta] -= -f[local_jdx][beta]; + dynmat[alpha][(j-1)*3+beta] /= (2 * del * imass); + dynmat[alpha][(j-1)*3+beta] *= conversion; + } + } else { + for (int beta=0; beta<3; beta++){ + dynmat[alpha][gm[j-1]*3+beta] -= -f[local_jdx][beta]; + dynmat[alpha][gm[j-1]*3+beta] /= (2 * del * imass); + dynmat[alpha][gm[j-1]*3+beta] *= conversion; + } + } + } + } + displace_atom(local_idx,alpha,1); + } + for (int k=0; k<3; k++) + MPI_Reduce(dynmat[k],fdynmat[k],dynlenb,MPI_DOUBLE,MPI_SUM,0,world); + if (comm->me == 0) + writeMatrix(fdynmat); dynmat_clear(dynmat); - - if (comm->me == 0) { - utils::logmesg(lmp,fmt::format("Calculating Dynamical Matrix ...\n")); - utils::logmesg(lmp,fmt::format(" Total # of atoms = {}\n", natoms)); - utils::logmesg(lmp,fmt::format(" Atoms in group = {}\n", gcount)); - utils::logmesg(lmp,fmt::format(" Total dynamical matrix elements = {}\n", (dynlenb*dynlen))); + if (comm->me == 0 && screen) { + int p = 10 * gm[i-1] / gcount; + if (p > prog) { + prog = p; + fprintf(screen," %d%%",p*10); + fflush(screen); + } } + } + if (comm->me == 0 && screen) fprintf(screen,"\n"); - // emit dynlen rows of dimalpha*dynlen*dimbeta elements + for (int i=0; i < 3; i++) + delete [] dynmat[i]; + delete [] dynmat; - update->nsteps = 0; - int prog = 0; - for (bigint i=1; i<=natoms; i++) { - local_idx = atom->map(i); - if (gm[i-1] < 0) - continue; - for (int alpha=0; alpha<3; alpha++) { - displace_atom(local_idx, alpha, 1); - update_force(); - for (bigint j=1; j<=natoms; j++) { - local_jdx = atom->map(j); - if (local_idx >= 0 && local_jdx >= 0 && local_jdx < nlocal - && (gm[j-1] >= 0 || folded)){ - if (folded) { - for (int beta=0; beta<3; beta++){ - dynmat[alpha][(j-1)*3+beta] -= f[local_jdx][beta]; - } - } else { - for (int beta=0; beta<3; beta++){ - dynmat[alpha][gm[j-1]*3+beta] -= f[local_jdx][beta]; - } - } - } - } - displace_atom(local_idx,alpha,-2); - update_force(); - for (bigint j=1; j<=natoms; j++) { - local_jdx = atom->map(j); - if (local_idx >= 0 && local_jdx >= 0 && local_jdx < nlocal - && (gm[j-1] >= 0 || folded)){ - if (atom->rmass_flag == 1) - imass = sqrt(m[local_idx] * m[local_jdx]); - else - imass = sqrt(m[type[local_idx]] * m[type[local_jdx]]); - if (folded){ - for (int beta=0; beta<3; beta++){ - dynmat[alpha][(j-1)*3+beta] -= -f[local_jdx][beta]; - dynmat[alpha][(j-1)*3+beta] /= (2 * del * imass); - dynmat[alpha][(j-1)*3+beta] *= conversion; - } - } else { - for (int beta=0; beta<3; beta++){ - dynmat[alpha][gm[j-1]*3+beta] -= -f[local_jdx][beta]; - dynmat[alpha][gm[j-1]*3+beta] /= (2 * del * imass); - dynmat[alpha][gm[j-1]*3+beta] *= conversion; - } - } - } - } - displace_atom(local_idx,alpha,1); - } - for (int k=0; k<3; k++) - MPI_Reduce(dynmat[k],fdynmat[k],dynlenb,MPI_DOUBLE,MPI_SUM,0,world); - if (comm->me == 0) - writeMatrix(fdynmat); - dynmat_clear(dynmat); - if (comm->me == 0 && screen) { - int p = 10 * gm[i-1] / gcount; - if (p > prog) { - prog = p; - fprintf(screen," %d%%",p*10); - fflush(screen); - } - } - } - if (comm->me == 0 && screen) fprintf(screen,"\n"); + for (int i=0; i < 3; i++) + delete [] fdynmat[i]; + delete [] fdynmat; - for (int i=0; i < 3; i++) - delete [] dynmat[i]; - delete [] dynmat; - - for (int i=0; i < 3; i++) - delete [] fdynmat[i]; - delete [] fdynmat; - - if (screen && comm->me ==0) fprintf(screen,"Finished Calculating Dynamical Matrix\n"); + if (screen && comm->me ==0) fprintf(screen,"Finished Calculating Dynamical Matrix\n"); } /* ---------------------------------------------------------------------- @@ -357,26 +357,26 @@ void DynamicalMatrix::calculateMatrix() void DynamicalMatrix::writeMatrix(double **dynmat) { - if (comm->me != 0 || !fp) - return; + if (comm->me != 0 || !fp) + return; - clearerr(fp); - if (binaryflag) { - for (int i=0; i<3; i++) - fwrite(dynmat[i], sizeof(double), dynlenb, fp); - if (ferror(fp)) - error->one(FLERR, "Error writing to binary file"); - } else { - for (int i = 0; i < 3; i++) { - for (bigint j = 0; j < dynlenb; j++) { - fmt::print(fp, "{:.8f}", dynmat[i][j]); - if ((j+1)%3==0) fputs("\n",fp); - else fputs(" ",fp); - } - } - if (ferror(fp)) - error->one(FLERR,"Error writing to file"); + clearerr(fp); + if (binaryflag) { + for (int i=0; i<3; i++) + fwrite(dynmat[i], sizeof(double), dynlenb, fp); + if (ferror(fp)) + error->one(FLERR, "Error writing to binary file"); + } else { + for (int i = 0; i < 3; i++) { + for (bigint j = 0; j < dynlenb; j++) { + fmt::print(fp, "{:.8f}", dynmat[i][j]); + if ((j+1)%3==0) fputs("\n",fp); + else fputs(" ",fp); + } } + if (ferror(fp)) + error->one(FLERR,"Error writing to file"); + } } /* ---------------------------------------------------------------------- @@ -385,18 +385,18 @@ void DynamicalMatrix::writeMatrix(double **dynmat) void DynamicalMatrix::displace_atom(int local_idx, int direction, int magnitude) { - if (local_idx < 0) return; + if (local_idx < 0) return; - double **x = atom->x; - int *sametag = atom->sametag; - int j = local_idx; + double **x = atom->x; + int *sametag = atom->sametag; + int j = local_idx; - x[local_idx][direction] += del*magnitude; + x[local_idx][direction] += del*magnitude; - while (sametag[j] >= 0) { - j = sametag[j]; - x[j][direction] += del*magnitude; - } + while (sametag[j] >= 0) { + j = sametag[j]; + x[j][direction] += del*magnitude; + } } @@ -410,35 +410,35 @@ void DynamicalMatrix::displace_atom(int local_idx, int direction, int magnitude) void DynamicalMatrix::update_force() { - force_clear(); - int n_post_force = modify->n_post_force; + force_clear(); + int n_post_force = modify->n_post_force; - if (pair_compute_flag) { - force->pair->compute(eflag,vflag); - timer->stamp(Timer::PAIR); - } - if (atom->molecular != Atom::ATOMIC) { - if (force->bond) force->bond->compute(eflag,vflag); - if (force->angle) force->angle->compute(eflag,vflag); - if (force->dihedral) force->dihedral->compute(eflag,vflag); - if (force->improper) force->improper->compute(eflag,vflag); - timer->stamp(Timer::BOND); - } - if (kspace_compute_flag) { - force->kspace->compute(eflag,vflag); - timer->stamp(Timer::KSPACE); - } - if (force->newton) { - comm->reverse_comm(); - timer->stamp(Timer::COMM); - } + if (pair_compute_flag) { + force->pair->compute(eflag,vflag); + timer->stamp(Timer::PAIR); + } + if (atom->molecular != Atom::ATOMIC) { + if (force->bond) force->bond->compute(eflag,vflag); + if (force->angle) force->angle->compute(eflag,vflag); + if (force->dihedral) force->dihedral->compute(eflag,vflag); + if (force->improper) force->improper->compute(eflag,vflag); + timer->stamp(Timer::BOND); + } + if (kspace_compute_flag) { + force->kspace->compute(eflag,vflag); + timer->stamp(Timer::KSPACE); + } + if (force->newton) { + comm->reverse_comm(); + timer->stamp(Timer::COMM); + } - // force modifications + // force modifications - if (n_post_force) modify->post_force(vflag); - timer->stamp(Timer::MODIFY); + if (n_post_force) modify->post_force(vflag); + timer->stamp(Timer::MODIFY); - ++ update->nsteps; + ++ update->nsteps; } /* ---------------------------------------------------------------------- @@ -448,17 +448,17 @@ void DynamicalMatrix::update_force() void DynamicalMatrix::force_clear() { - if (external_force_clear) return; + if (external_force_clear) return; - // clear global force array - // if either newton flag is set, also include ghosts + // clear global force array + // if either newton flag is set, also include ghosts - size_t nbytes = sizeof(double) * atom->nlocal; - if (force->newton) nbytes += sizeof(double) * atom->nghost; + size_t nbytes = sizeof(double) * atom->nlocal; + if (force->newton) nbytes += sizeof(double) * atom->nghost; - if (nbytes) { - memset(&atom->f[0][0],0,3*nbytes); - } + if (nbytes) { + memset(&atom->f[0][0],0,3*nbytes); + } } /* ---------------------------------------------------------------------- @@ -480,54 +480,54 @@ void DynamicalMatrix::dynmat_clear(double **dynmat) void DynamicalMatrix::convert_units(const char *style) { - // physical constants from: - // https://physics.nist.gov/cuu/Constants/Table/allascii.txt - // using thermochemical calorie = 4.184 J + // physical constants from: + // https://physics.nist.gov/cuu/Constants/Table/allascii.txt + // using thermochemical calorie = 4.184 J - if (strcmp(style,"lj") == 0) { - error->all(FLERR,"Conversion Not Set"); - //conversion = 1; // lj -> 10 J/mol + if (strcmp(style,"lj") == 0) { + error->all(FLERR,"Conversion Not Set"); + //conversion = 1; // lj -> 10 J/mol - } else if (strcmp(style,"real") == 0) { - conv_energy = 418.4; // kcal/mol -> 10 J/mol - conv_mass = 1; // g/mol -> g/mol - conv_distance = 1; // angstrom -> angstrom + } else if (strcmp(style,"real") == 0) { + conv_energy = 418.4; // kcal/mol -> 10 J/mol + conv_mass = 1; // g/mol -> g/mol + conv_distance = 1; // angstrom -> angstrom - } else if (strcmp(style,"metal") == 0) { - conv_energy = 9648.5; // eV -> 10 J/mol - conv_mass = 1; // g/mol -> g/mol - conv_distance = 1; // angstrom -> angstrom + } else if (strcmp(style,"metal") == 0) { + conv_energy = 9648.5; // eV -> 10 J/mol + conv_mass = 1; // g/mol -> g/mol + conv_distance = 1; // angstrom -> angstrom - } else if (strcmp(style,"si") == 0) { - if (comm->me) error->warning(FLERR,"Conversion Warning: Multiplication by Large Float"); - conv_energy = 6.022E22; // J -> 10 J/mol - conv_mass = 6.022E26; // kg -> g/mol - conv_distance = 1E-10; // meter -> angstrom + } else if (strcmp(style,"si") == 0) { + if (comm->me) error->warning(FLERR,"Conversion Warning: Multiplication by Large Float"); + conv_energy = 6.022E22; // J -> 10 J/mol + conv_mass = 6.022E26; // kg -> g/mol + conv_distance = 1E-10; // meter -> angstrom - } else if (strcmp(style,"cgs") == 0) { - if (comm->me) error->warning(FLERR,"Conversion Warning: Multiplication by Large Float"); - conv_energy = 6.022E12; // Erg -> 10 J/mol - conv_mass = 6.022E23; // g -> g/mol - conv_distance = 1E-7; // centimeter -> angstrom + } else if (strcmp(style,"cgs") == 0) { + if (comm->me) error->warning(FLERR,"Conversion Warning: Multiplication by Large Float"); + conv_energy = 6.022E12; // Erg -> 10 J/mol + conv_mass = 6.022E23; // g -> g/mol + conv_distance = 1E-7; // centimeter -> angstrom - } else if (strcmp(style,"electron") == 0) { - conv_energy = 262550; // Hartree -> 10 J/mol - conv_mass = 1; // amu -> g/mol - conv_distance = 0.529177249; // bohr -> angstrom + } else if (strcmp(style,"electron") == 0) { + conv_energy = 262550; // Hartree -> 10 J/mol + conv_mass = 1; // amu -> g/mol + conv_distance = 0.529177249; // bohr -> angstrom - } else if (strcmp(style,"micro") == 0) { - if (comm->me) error->warning(FLERR,"Conversion Warning: Untested Conversion"); - conv_energy = 6.022E10; // picogram-micrometer^2/microsecond^2 -> 10 J/mol - conv_mass = 6.022E11; // pg -> g/mol - conv_distance = 1E-4; // micrometer -> angstrom + } else if (strcmp(style,"micro") == 0) { + if (comm->me) error->warning(FLERR,"Conversion Warning: Untested Conversion"); + conv_energy = 6.022E10; // picogram-micrometer^2/microsecond^2 -> 10 J/mol + conv_mass = 6.022E11; // pg -> g/mol + conv_distance = 1E-4; // micrometer -> angstrom - } else if (strcmp(style,"nano") == 0) { - if (comm->me) error->warning(FLERR,"Conversion Warning: Untested Conversion"); - conv_energy = 6.022E4; // attogram-nanometer^2/nanosecond^2 -> 10 J/mol - conv_mass = 6.022E5; // ag -> g/mol - conv_distance = 0.1; // angstrom -> angstrom + } else if (strcmp(style,"nano") == 0) { + if (comm->me) error->warning(FLERR,"Conversion Warning: Untested Conversion"); + conv_energy = 6.022E4; // attogram-nanometer^2/nanosecond^2 -> 10 J/mol + conv_mass = 6.022E5; // ag -> g/mol + conv_distance = 0.1; // angstrom -> angstrom - } else error->all(FLERR,"Units Type Conversion Not Found"); + } else error->all(FLERR,"Units Type Conversion Not Found"); } @@ -535,66 +535,66 @@ void DynamicalMatrix::convert_units(const char *style) void DynamicalMatrix::create_groupmap() { - //Create a group map which maps atom order onto group - // groupmap[global atom index-1] = output column/row + //Create a group map which maps atom order onto group + // groupmap[global atom index-1] = output column/row - int local_idx; // local index - int gid = 0; //group index - int nlocal = atom->nlocal; - int *mask = atom->mask; - bigint natoms = atom->natoms; - int *recv = new int[comm->nprocs]; - int *displs = new int[comm->nprocs]; - bigint *temp_groupmap = new bigint[natoms]; + int local_idx; // local index + int gid = 0; //group index + int nlocal = atom->nlocal; + int *mask = atom->mask; + bigint natoms = atom->natoms; + int *recv = new int[comm->nprocs]; + int *displs = new int[comm->nprocs]; + bigint *temp_groupmap = new bigint[natoms]; - //find number of local atoms in the group (final_gid) - for (bigint i=1; i<=natoms; i++) { - local_idx = atom->map(i); - if ((local_idx >= 0) && (local_idx < nlocal) && mask[local_idx] & groupbit) - gid += 1; // gid at the end of loop is final_Gid + //find number of local atoms in the group (final_gid) + for (bigint i=1; i<=natoms; i++) { + local_idx = atom->map(i); + if ((local_idx >= 0) && (local_idx < nlocal) && mask[local_idx] & groupbit) + gid += 1; // gid at the end of loop is final_Gid + } + //create an array of length final_gid + bigint *sub_groupmap = new bigint[gid]; + + gid = 0; + //create a map between global atom id and group atom id for each proc + for (bigint i=1; i<=natoms; i++) { + local_idx = atom->map(i); + if ((local_idx >= 0) && (local_idx < nlocal) && mask[local_idx] & groupbit) { + sub_groupmap[gid] = i; + gid += 1; } - //create an array of length final_gid - bigint *sub_groupmap = new bigint[gid]; + } - gid = 0; - //create a map between global atom id and group atom id for each proc - for (bigint i=1; i<=natoms; i++) { - local_idx = atom->map(i); - if ((local_idx >= 0) && (local_idx < nlocal) && mask[local_idx] & groupbit) { - sub_groupmap[gid] = i; - gid += 1; - } - } + //populate arrays for Allgatherv + for (int i=0; i < comm->nprocs; i++) { + recv[i] = 0; + } + recv[comm->me] = gid; + MPI_Allreduce(recv,displs,comm->nprocs,MPI_INT,MPI_SUM,world); + for (int i=0; i < comm->nprocs; i++) { + recv[i]=displs[i]; + if (i>0) displs[i] = displs[i-1]+recv[i-1]; + else displs[i] = 0; + } - //populate arrays for Allgatherv - for (int i=0; i < comm->nprocs; i++) { - recv[i] = 0; - } - recv[comm->me] = gid; - MPI_Allreduce(recv,displs,comm->nprocs,MPI_INT,MPI_SUM,world); - for (int i=0; i < comm->nprocs; i++) { - recv[i]=displs[i]; - if (i>0) displs[i] = displs[i-1]+recv[i-1]; - else displs[i] = 0; - } + //combine subgroup maps into total temporary groupmap + MPI_Allgatherv(sub_groupmap,gid,MPI_LMP_BIGINT,temp_groupmap,recv,displs,MPI_LMP_BIGINT,world); + std::sort(temp_groupmap,temp_groupmap+gcount); - //combine subgroup maps into total temporary groupmap - MPI_Allgatherv(sub_groupmap,gid,MPI_LMP_BIGINT,temp_groupmap,recv,displs,MPI_LMP_BIGINT,world); - std::sort(temp_groupmap,temp_groupmap+gcount); + //populate member groupmap based on temp groupmap + bigint j = 0; + for (bigint i=1; i <= natoms; i++) { + // flag groupmap contents that are in temp_groupmap + if (j < gcount && i == temp_groupmap[j]) + groupmap[i-1] = j++; + else + groupmap[i-1] = -1; + } - //populate member groupmap based on temp groupmap - bigint j = 0; - for (bigint i=1; i <= natoms; i++) { - // flag groupmap contents that are in temp_groupmap - if (j < gcount && i == temp_groupmap[j]) - groupmap[i-1] = j++; - else - groupmap[i-1] = -1; - } - - //free that memory! - delete[] recv; - delete[] displs; - delete[] sub_groupmap; - delete[] temp_groupmap; + //free that memory! + delete[] recv; + delete[] displs; + delete[] sub_groupmap; + delete[] temp_groupmap; } diff --git a/src/USER-PHONON/third_order.cpp b/src/USER-PHONON/third_order.cpp index 010924982d..c43d3a5fb2 100644 --- a/src/USER-PHONON/third_order.cpp +++ b/src/USER-PHONON/third_order.cpp @@ -423,8 +423,8 @@ void ThirdOrder::writeMatrix(double *dynmat, bigint i, int a, bigint j, int b) if (folded){ for (int k = 0; k < atom->natoms; k++){ norm = square(dynmat[k*3])+ - square(dynmat[k*3+1])+ - square(dynmat[k*3+2]); + square(dynmat[k*3+1])+ + square(dynmat[k*3+2]); if (norm > 1.0e-16) fmt::print(fp,"{} {} {} {} {} {:.8f} {.8f} {.8f}\n", i+1, a + 1, j+1, b + 1, k+1, @@ -435,8 +435,8 @@ void ThirdOrder::writeMatrix(double *dynmat, bigint i, int a, bigint j, int b) } else { for (int k = 0; k < gcount; k++){ norm = square(dynmat[k*3])+ - square(dynmat[k*3+1])+ - square(dynmat[k*3+2]); + square(dynmat[k*3+1])+ + square(dynmat[k*3+2]); if (norm > 1.0e-16) fmt::print(fp, "{} {} {} {} {} {:.8f} {.8f} {.8f}\n", i+1, a + 1, j+1, b + 1, groupmap[k]+1, From 99db66d4859383bee7a68c9231f50dc74cc55967 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Tue, 22 Sep 2020 14:56:40 -0500 Subject: [PATCH 0020/1471] Add launch bounds patches to avoid SWDEV-252521 Change-Id: I389b5db94c14f1e5bc90c1848bfc0e038774ee19 --- src/KOKKOS/pair_reaxc_kokkos.cpp | 33 +++++++++++++++--------------- src/KOKKOS/pair_tersoff_kokkos.cpp | 17 ++++++++------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 75a1448b33..4bcedb65a1 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -753,14 +753,14 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) } else { if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); } else if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); } } ev_all += ev; @@ -799,12 +799,12 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) k_resize_hb.sync(); // zero - Kokkos::parallel_for(Kokkos::RangePolicy(0,nmax),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nmax),*this); if (neighflag == HALF) - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,ignum),*this); else if (neighflag == HALFTHREAD) - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,ignum),*this); k_resize_bo.modify(); k_resize_bo.sync(); @@ -892,15 +892,15 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) // Angular if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); ev_all += ev; } pvector[4] = ev.ereax[3]; @@ -911,15 +911,15 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) // Torsion if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); ev_all += ev; } pvector[8] = ev.ereax[6]; @@ -1611,7 +1611,8 @@ void PairReaxCKokkos::operator()(PairReaxBuildListsHalf, const int itype = type(i); const int jnum = d_numneigh[i]; - F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3], dBOp_i[3], dln_BOp_pi_i[3], dln_BOp_pi2_i[3]; + const int three = 3; + F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[three], dBOp_i[three], dln_BOp_pi_i[three], dln_BOp_pi2_i[three]; F_FLOAT total_bo = 0.0; int j_index,i_index; diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 72a79c0b49..5c828e743b 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -226,27 +226,27 @@ void PairTersoffKokkos::compute(int eflag_in, int vflag_in) if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); ev_all += ev; } else if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); ev_all += ev; } else if (neighflag == FULL) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); ev_all += ev; if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ignum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,ignum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,ignum),*this); ev_all += ev; } @@ -1289,4 +1289,3 @@ template class PairTersoffKokkos; template class PairTersoffKokkos; #endif } - From 5b8e138d0f6606f75c8ba4a526e806791cafafc7 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Mon, 28 Sep 2020 10:41:50 -0500 Subject: [PATCH 0021/1471] add more missing launch bounds Change-Id: Ifc4c94963b3c1c26455d32598f366af84c73883e --- src/KOKKOS/pair_eam_kokkos.cpp | 24 ++++++++++++------------ src/KOKKOS/pair_reaxc_kokkos.cpp | 8 ++++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 5025b4d642..08e164a6c9 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -201,41 +201,41 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) if (evflag) { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } } } else { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); } } } diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 4bcedb65a1..ba44223570 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -930,15 +930,15 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) if (cut_hbsq > 0.0) { if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); ev_all += ev; } } From 51b3cb640f5c090cee6c089ec996bebc11b45d00 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Thu, 1 Oct 2020 15:56:49 -0500 Subject: [PATCH 0022/1471] Adding support to reducing number of mat lib. calls (-DHIP_OPT_USE_LESS_MATH) Change-Id: I44d140cf1b9421ccb3277b0592d3100b76c6eae9 --- src/KOKKOS/pair_reaxc_kokkos.cpp | 57 ++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index ba44223570..58c180158a 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -33,7 +33,6 @@ #include - #define TEAMSIZE 128 /* ---------------------------------------------------------------------- */ @@ -1151,20 +1150,43 @@ void PairReaxCKokkos::operator()(PairReaxComputeLJCoulomb::operator()(PairReaxComputeLJCoulomb::operator()(PairReaxComputeAngular 0.0 && SBO <= 1.0) { + #ifdef HIP_OPT_USE_LESS_MATH + CSBO2 = pow( SBO, p_val9 - 1.0 ); + SBO2 = CSBO2*SBO; + CSBO2 = p_val9 * CSBO2; + #else SBO2 = pow( SBO, p_val9 ); CSBO2 = p_val9 * pow( SBO, p_val9 - 1.0 ); + #endif } else if (SBO > 1.0 && SBO < 2.0) { + #ifdef HIP_OPT_USE_LESS_MATH + CSBO2 = pow( 2.0 - SBO, p_val9 - 1.0 ); + SBO2 = 2.0 - CSBO2*(2.0 - SBO); + CSBO2 = p_val9 * CSBO2; + #else SBO2 = 2.0 - pow( 2.0-SBO, p_val9 ); CSBO2 = p_val9 * pow( 2.0 - SBO, p_val9 - 1.0 ); + #endif } else { SBO2 = 2.0; CSBO2 = 0.0; } expval6 = exp( p_val6 * d_Delta_boc[i] ); + F_FLOAT CdDelta_i = 0.0; F_FLOAT fitmp[3],fjtmp[3]; for (int j = 0; j < 3; j++) fitmp[j] = 0.0; @@ -2352,12 +2387,28 @@ void PairReaxCKokkos::operator()(PairReaxComputeAngular Date: Thu, 1 Oct 2020 18:23:41 -0500 Subject: [PATCH 0023/1471] Revised torsion kernel with preprocessing - preprocess the interaction lists to reduce divergence Change-Id: I842cc424963333308c5aaaeb202c8f41f07a55f5 --- src/KOKKOS/pair_reaxc_kokkos.cpp | 175 +++++++++++++++++++++++++++---- src/KOKKOS/pair_reaxc_kokkos.h | 12 +++ 2 files changed, 165 insertions(+), 22 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 58c180158a..a31292623d 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -801,9 +801,9 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy>(0,nmax),*this); if (neighflag == HALF) - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,ignum),*this); else if (neighflag == HALFTHREAD) - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,ignum),*this); k_resize_bo.modify(); k_resize_bo.sync(); @@ -891,15 +891,15 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) // Angular if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); ev_all += ev; } pvector[4] = ev.ereax[3]; @@ -910,16 +910,56 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) // Torsion if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); - ev_all += ev; - } else { //if (neighflag == HALFTHREAD) { - if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); - else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); ev_all += ev; + } else { + + hipHostMalloc((void**) &counters,sizeof(int)*inum, hipHostMallocNonCoherent); + #if 1 + hipHostMalloc((void**) &counters_jj_min,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_jj_max,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_kk_min,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_kk_max,sizeof(int)*inum, hipHostMallocNonCoherent); + #else + hipMalloc((void**) &counters_jj_min,sizeof(int)*inum); + hipMalloc((void**) &counters_jj_max,sizeof(int)*inum); + hipMalloc((void**) &counters_kk_min,sizeof(int)*inum); + hipMalloc((void**) &counters_kk_max,sizeof(int)*inum); + #endif + + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); + else{ + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + hipDeviceSynchronize(); + int nnz = 0; + for (int i = 0; i < inum; ++i){ + if (counters[i] > 0){ + counters[nnz] = i; + nnz++; + } + } + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); + } + ev_all += ev; + hipDeviceSynchronize(); + + + hipHostFree(counters); + #if 1 + hipHostFree(counters_jj_min); + hipHostFree(counters_jj_max); + hipHostFree(counters_kk_min); + hipHostFree(counters_kk_max); + #else + hipFree(counters_jj_min); + hipFree(counters_jj_max); + hipFree(counters_kk_min); + hipFree(counters_kk_max); + #endif + } pvector[8] = ev.ereax[6]; pvector[9] = ev.ereax[7]; @@ -2535,6 +2575,91 @@ void PairReaxCKokkos::operator()(PairReaxComputeAngular +template +KOKKOS_INLINE_FUNCTION +void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, const int &ii) const { + + F_FLOAT bo_ij, bo_ik, bo_jl; + //F_FLOAT fn10, f11_DiDj, dfn11, fn12; + int counter = 0; + + const int i = d_ilist[ii]; + const int itype = type(i); + const tagint itag = tag(i); + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + + const int j_start = d_bo_first[i]; + const int j_end = j_start + d_bo_num[i]; + + int jj_min = j_end+1; + int jj_max = j_start-1; + int kk_min = j_end+1; + int kk_max = j_start-1; + + for (int jj = j_start; jj < j_end; jj++) { + int j = d_bo_list[jj]; + j &= NEIGHMASK; + const tagint jtag = tag(j); + const int j_index = jj - j_start; + + // skip half of the interactions + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp && x(j,1) < ytmp) continue; + if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue; + } + + bo_ij = d_BO(i,j_index); + if (bo_ij < thb_cut) continue; + + const int l_start = d_bo_first[j]; + const int l_end = l_start + d_bo_num[j]; + + for (int kk = j_start; kk < j_end; kk++) { + int k = d_bo_list[kk]; + k &= NEIGHMASK; + if (k == j) continue; + const int k_index = kk - j_start; + bo_ik = d_BO(i,k_index); + if (bo_ik < thb_cut) continue; + + for (int ll = l_start; ll < l_end; ll++) { + int l = d_bo_list[ll]; + l &= NEIGHMASK; + if (l == i) continue; + const int l_index = ll - l_start; + + bo_jl = d_BO(j,l_index); + if (l == k || bo_jl < thb_cut || bo_ij*bo_ik*bo_jl < thb_cut) continue; + //if we got so far forces will be computed + counter++; + jj_min = jj < jj_min ? jj : jj_min; + jj_max = jj > jj_max ? jj : jj_max; + kk_min = kk < kk_min ? kk : kk_min; + kk_max = kk > kk_max ? kk : kk_max; + } + } + } + counters[ii] = counter; + if (counter > 0){ + counters_jj_min[ii] = jj_min; + counters_jj_max[ii] = jj_max; + counters_kk_min[ii] = kk_min; + counters_kk_max[ii] = kk_max; + } +} + + + /* ---------------------------------------------------------------------- */ template @@ -2588,7 +2713,9 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion::operator()(PairReaxComputeTorsion::operator()(PairReaxComputeTorsion 1.0) cos_ijk = 1.0; - if (cos_ijk < -1.0) cos_ijk = -1.0; + else if (cos_ijk < -1.0) cos_ijk = -1.0; //LG changed "if" to "else if" theta_ijk = acos(cos_ijk); // dcos_ijk const F_FLOAT inv_dists = 1.0 / (rij * rik); - const F_FLOAT cos_ijk_tmp = cos_ijk / ((rij*rik)*(rij*rik)); +// const F_FLOAT cos_ijk_tmp = cos_ijk / ((rij*rik)*(rij*rik));//LG + const F_FLOAT cos_ijk_tmp = cos_ijk *inv_dists * inv_dists; for (int d = 0; d < 3; d++) { dcos_ijk_di[d] = -(delik[d] + delij[d]) * inv_dists + cos_ijk_tmp * (rsqik * delij[d] + rsqij * delik[d]); @@ -2689,7 +2818,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion 1.0) cos_jil = 1.0; - if (cos_jil < -1.0) cos_jil = -1.0; + else if (cos_jil < -1.0) cos_jil = -1.0; //LG changed "if" to "else if" theta_jil = acos(cos_jil); // dcos_jil @@ -2750,7 +2879,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion 1.0) arg = 1.0; - if (arg < -1.0) arg = -1.0; + else if (arg < -1.0) arg = -1.0; //LG changed from "if" to "else if" F_FLOAT sin_ijk_rnd = sin_ijk; F_FLOAT sin_jil_rnd = sin_jil; @@ -2905,11 +3034,13 @@ template template KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeTorsion, const int &ii) const { - EV_FLOAT_REAX ev; - this->template operator()(PairReaxComputeTorsion(), ii, ev); + + EV_FLOAT_REAX ev; + this->template operator()(PairReaxComputeTorsion(), counters[ii], ev); } + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/pair_reaxc_kokkos.h b/src/KOKKOS/pair_reaxc_kokkos.h index 5f4ae68c42..fb7295c56b 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.h +++ b/src/KOKKOS/pair_reaxc_kokkos.h @@ -94,6 +94,10 @@ struct PairReaxComputeMulti2{}; template struct PairReaxComputeAngular{}; +//LG sorting atoms for processing in PairReaxComputeTorsion +template +struct PairReaxComputeTorsion_preview{}; + template struct PairReaxComputeTorsion{}; @@ -220,6 +224,10 @@ class PairReaxCKokkos : public PairReaxC { KOKKOS_INLINE_FUNCTION void operator()(PairReaxComputeTorsion, const int&, EV_FLOAT_REAX&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(PairReaxComputeTorsion_preview, const int&) const; + template KOKKOS_INLINE_FUNCTION void operator()(PairReaxComputeTorsion, const int&) const; @@ -340,6 +348,10 @@ class PairReaxCKokkos : public PairReaxC { void Deallocate_Lookup_Tables(); void LR_vdW_Coulomb( int i, int j, double r_ij, LR_data *lr ); + int* counters; + int *counters_jj_min, *counters_jj_max,*counters_kk_min,*counters_kk_max; + size_t counters_length; + typedef Kokkos::DualView tdual_int_1d; Kokkos::DualView k_params_sing; typename Kokkos::DualView::t_dev_const paramssing; From bcd4ab036c7b3e34cb6282bdd8431977d14d2f9d Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Mon, 5 Oct 2020 13:30:05 -0500 Subject: [PATCH 0024/1471] Implement forward device pack/unpack for fix/qeq/reax Change-Id: I690a63f7ef467d3f1b39784ac97cab54ed7fb248 --- src/KOKKOS/comm_kokkos.cpp | 79 ++++++++++++++++++++---------- src/KOKKOS/comm_kokkos.h | 10 ++++ src/KOKKOS/fix_qeq_reax_kokkos.cpp | 76 ++++++++++++++++++++++++++++ src/KOKKOS/fix_qeq_reax_kokkos.h | 9 ++++ 4 files changed, 149 insertions(+), 25 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index e3ba5467b0..133b93f50a 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -474,51 +474,46 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) MPI_Request request; DAT::tdual_xfloat_1d k_buf_tmp; - int nsize = pair->comm_forward; - KokkosBase* pairKKBase = dynamic_cast(pair); - - for (iswap = 0; iswap < nswap; iswap++) { - int n = MAX(max_buf_pair,nsize*sendnum[iswap]); - n = MAX(n,nsize*recvnum[iswap]); - if (n > max_buf_pair) - grow_buf_pair(n); - } + MPI_Request request; + int iswap, n; for (iswap = 0; iswap < nswap; iswap++) { // pack buffer - n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist, - iswap,k_buf_send_pair,pbc_flag[iswap],pbc[iswap]); + n = KKBase->pack_forward_comm_kokkos(sendnum[iswap], k_sendlist, iswap, + k_buf_send_fop, pbc_flag[iswap], + pbc[iswap]); DeviceType().fence(); // exchange with another proc // if self, set recv buffer to send buffer if (sendproc[iswap] != me) { - double* buf_send_pair; - double* buf_recv_pair; + double *buf_send; + double *buf_recv; if (lmp->kokkos->gpu_aware_flag) { - buf_send_pair = k_buf_send_pair.view().data(); - buf_recv_pair = k_buf_recv_pair.view().data(); + buf_send = k_buf_send_fop.view().data(); + buf_recv = k_buf_recv_fop.view().data(); } else { - k_buf_send_pair.modify(); - k_buf_send_pair.sync(); - buf_send_pair = k_buf_send_pair.h_view.data(); - buf_recv_pair = k_buf_recv_pair.h_view.data(); + k_buf_send_fop.modify(); + k_buf_send_fop.sync(); + buf_send = k_buf_send_fop.h_view.data(); + buf_recv = k_buf_recv_fop.h_view.data(); } if (recvnum[iswap]) { - MPI_Irecv(buf_recv_pair,nsize*recvnum[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); + MPI_Irecv(buf_recv, nsize * recvnum[iswap], MPI_DOUBLE, recvproc[iswap], + 0, world, &request); } if (sendnum[iswap]) - MPI_Send(buf_send_pair,n,MPI_DOUBLE,sendproc[iswap],0,world); - if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + MPI_Send(buf_send, n, MPI_DOUBLE, sendproc[iswap], 0, world); + if (recvnum[iswap]) + MPI_Wait(&request, MPI_STATUS_IGNORE); if (!lmp->kokkos->gpu_aware_flag) { - k_buf_recv_pair.modify(); - k_buf_recv_pair.sync(); + k_buf_recv_fop.modify(); + k_buf_recv_fop.sync(); } k_buf_tmp = k_buf_recv_pair; } else k_buf_tmp = k_buf_send_pair; @@ -530,6 +525,40 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) } } +template +void CommKokkos::forward_comm_pair_device(Pair *pair) { + int nsize = pair->comm_forward; + KokkosBase *pairKKBase = dynamic_cast(pair); + + int iswap; + for (iswap = 0; iswap < nswap; iswap++) { + int n = MAX(max_buf_pair, nsize * sendnum[iswap]); + n = MAX(n, nsize * recvnum[iswap]); + if (n > max_buf_pair) + grow_buf_pair(n); + } + + forward_comm_device_fix_or_pair(pairKKBase, nsize, + k_buf_send_pair, k_buf_recv_pair); +} + +template void CommKokkos::forward_comm_fix_device(Fix *fix) { + + int nsize = fix->comm_forward; + KokkosBase *fixKKBase = dynamic_cast(fix); + + int iswap; + for (iswap = 0; iswap < nswap; iswap++) { + int n = MAX(max_buf_fix, nsize * sendnum[iswap]); + n = MAX(n, nsize * recvnum[iswap]); + if (n > max_buf_fix) + grow_buf_fix(n); + } + + forward_comm_device_fix_or_pair(fixKKBase, nsize, k_buf_send_fix, + k_buf_recv_fix); +} + void CommKokkos::grow_buf_pair(int n) { max_buf_pair = n * BUFFACTOR; k_buf_send_pair.resize(max_buf_pair); diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index b66de5a0d0..72d8bcaed7 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -16,6 +16,7 @@ #include "comm_brick.h" #include "kokkos_type.h" +#include "kokkos_base.h" namespace LAMMPS_NS { @@ -82,6 +83,11 @@ class CommKokkos : public CommBrick { void grow_buf_pair(int); void grow_buf_fix(int); + int max_buf_fix; + DAT::tdual_xfloat_1d k_buf_send_fix; + DAT::tdual_xfloat_1d k_buf_recv_fix; + void grow_buf_fix(int); + void grow_send(int, int); void grow_recv(int); void grow_send_kokkos(int, int, ExecutionSpace space = Host); @@ -89,6 +95,10 @@ class CommKokkos : public CommBrick { void grow_list(int, int); void grow_swap(int); void copy_swap_info(); + template + void forward_comm_device_fix_or_pair(KokkosBase *, int, + DAT::tdual_xfloat_1d &, + DAT::tdual_xfloat_1d &); }; } diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 7e4d99e1c3..5e2094533f 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -372,6 +372,9 @@ void FixQEqReaxKokkos::allocate_array() k_d = DAT::tdual_ffloat_1d("qeq/kk:d",nmax); d_d = k_d.template view(); h_d = k_d.h_view; + + d_q = atomKK->k_q.template view(); + h_d = atomKK->k_q.h_view; } // init_storage @@ -1420,6 +1423,45 @@ int FixQEqReaxKokkos::pack_forward_comm(int n, int *list, double *bu return n; } +/* ---------------------------------------------------------------------- */ + +template +int FixQEqReaxKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, + int iswap_in, DAT::tdual_xfloat_1d &buf, + int /*pbc_flag*/, int * /*pbc*/) +{ + d_sendlist = k_sendlist.view(); + iswap = iswap_in; + v_buf = buf.view(); + if (pack_flag == 1) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); + else if (pack_flag == 2) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); + else if (pack_flag == 3) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); + else if (pack_flag == 4) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); + return n; +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void FixQEqReaxKokkos::operator()(TagFixQEQPackFwdComm, const int &i) const { + int j = d_sendlist(iswap, i); + if (PACKFLAG == 1) + v_buf[i] = d_d[j]; + else if( PACKFLAG == 2 ) + v_buf[i] = d_s[j]; + else if( PACKFLAG == 3 ) + v_buf[i] = d_t[j]; + else if( PACKFLAG == 4 ) + v_buf[i] = d_q[j]; +} + + /* ---------------------------------------------------------------------- */ template @@ -1448,6 +1490,40 @@ void FixQEqReaxKokkos::unpack_forward_comm(int n, int first, double /* ---------------------------------------------------------------------- */ +template +void FixQEqReaxKokkos::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf) +{ + first = first_in; + v_buf = buf.view(); + + if (pack_flag == 1) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); + else if (pack_flag == 2) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); + else if (pack_flag == 3) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); + else if (pack_flag == 4) + Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void FixQEqReaxKokkos::operator()(TagFixQEQUnpackFwdComm, const int &i) const { + if (PACKFLAG == 1) + d_d[i + first] = v_buf[i]; + else if( PACKFLAG == 2 ) + d_s[i + first] = v_buf[i]; + else if( PACKFLAG == 3 ) + d_t[i + first] = v_buf[i]; + else if( PACKFLAG == 4 ) + d_q[i + first] = v_buf[i]; +} + +/* ---------------------------------------------------------------------- */ + template int FixQEqReaxKokkos::pack_reverse_comm(int n, int first, double *buf) { diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.h b/src/KOKKOS/fix_qeq_reax_kokkos.h index e0205ce801..3e4220bfd1 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.h +++ b/src/KOKKOS/fix_qeq_reax_kokkos.h @@ -22,6 +22,7 @@ FixStyle(qeq/reax/kk/host,FixQEqReaxKokkos) #ifndef LMP_FIX_QEQ_REAX_KOKKOS_H #define LMP_FIX_QEQ_REAX_KOKKOS_H +#include "kokkos_base.h" #include "fix_qeq_reax.h" #include "kokkos_type.h" #include "neigh_list.h" @@ -103,6 +104,14 @@ class FixQEqReaxKokkos : public FixQEqReax, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagZeroQGhosts, const int&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagFixQEQPackFwdComm, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagFixQEQUnpackFwdComm, const int&) const; + KOKKOS_INLINE_FUNCTION void vecsum2_item(int) const; From 705d3148791f18814462f37c40d047c68917c0da Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Fri, 2 Oct 2020 14:32:01 -0400 Subject: [PATCH 0025/1471] revised version of TagPairTersoffComputeHalf; requires defining define USE_COMBINED_JJ_LLOPS_TERSOFF Change-Id: I45717fb9af210ef8bedffc2d228b60a7ec0368ef --- src/KOKKOS/pair_tersoff_kokkos.cpp | 46 +++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 5c828e743b..4e4c49d480 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -320,6 +320,7 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeShortNeigh, /* ---------------------------------------------------------------------- */ + template template KOKKOS_INLINE_FUNCTION @@ -349,6 +350,9 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeHalf::operator()(TagPairTersoffComputeHalftemplate ev_tally(ev,i,j,eng,frep,delx,dely,delz); } } - +#endif // attractive: bond order for (int jj = 0; jj < jnum; jj++) { @@ -486,6 +490,46 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeHalftemplate v_tally3(ev,i,j,k,fj,fk,delrij,delrik); } } + + #ifdef HIP_OPT_COMBINED_JJ_LLOPS_TERSOFF + + const tagint jtag = tag(j); + bool CONTINUE_FLAG=false; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) CONTINUE_FLAG=true; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) CONTINUE_FLAG=true; + } else { + if (x(j,2) < ztmp) CONTINUE_FLAG=true; + if (x(j,2) == ztmp && x(j,1) < ytmp) CONTINUE_FLAG=true; + if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) CONTINUE_FLAG=true; + } + if (CONTINUE_FLAG != true){ + const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,rij); + const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,rij); + const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * rij); + const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * + (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / rij; + const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; + + f_x += delx1*frep; + fj_x -= delx1*frep; + + f_y += dely1*frep; + fj_y -= dely1*frep; + + f_z += delz1*frep; + fj_z -= delz1*frep; + + if (EVFLAG) { + if (eflag) ev.evdwl += eng; + if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,eng,frep,delx1,dely1,delz1); + } + } + + #endif + a_f(j,0) += fj_x; a_f(j,1) += fj_y; a_f(j,2) += fj_z; From 3b3ef7e9a514b3e66f3763c2119b9e2f806628fe Mon Sep 17 00:00:00 2001 From: Sriranjani Sitaraman Date: Fri, 2 Oct 2020 17:57:30 -0400 Subject: [PATCH 0026/1471] Minor optimization of tersoff kernel TagPairTersoffComputeShortNeigh Change-Id: If70e524b6bf30d3bcdacf732cf3aab897e2843d4 --- src/KOKKOS/pair_tersoff_kokkos.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 4e4c49d480..020e2bbfe3 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -298,6 +298,7 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeShortNeigh, const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); + const F_FLOAT cutmax_sq = cutmax*cutmax; const int jnum = d_numneigh[i]; int inside = 0; @@ -310,7 +311,7 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeShortNeigh, const X_FLOAT delz = ztmp - x(j,2); const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; - if (rsq < cutmax*cutmax) { + if (rsq < cutmax_sq) { d_neighbors_short(i,inside) = j; inside++; } From 44d036f4861099da0218e4ce6adfa4f9087e3959 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Wed, 7 Oct 2020 15:32:53 -0500 Subject: [PATCH 0027/1471] fixing a bug in Torsion preview Change-Id: I8f119d695f5ea3d9faa27843f87e54d3e0e086e9 --- src/KOKKOS/pair_reaxc_kokkos.cpp | 37 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index a31292623d..635a3f8eac 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -1122,8 +1122,8 @@ void PairReaxCKokkos::operator()(PairReaxComputeLJCoulomb::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); - auto a_f = v_f.template access::value>(); + const auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + const auto a_f = v_f.template access::value>(); F_FLOAT powr_vdw, powgi_vdw, fn13, dfn13, exp1, exp2, etmp; F_FLOAT evdwl, fvdwl; @@ -1297,8 +1297,8 @@ void PairReaxCKokkos::operator()(PairReaxComputeTabulatedLJCoulomb::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); - auto a_f = v_f.template access::value>(); + const auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + const auto a_f = v_f.template access::value>(); const int i = d_ilist[ii]; const X_FLOAT xtmp = x(i,0); @@ -1660,11 +1660,11 @@ void PairReaxCKokkos::operator()(PairReaxBuildListsHalf, if (d_resize_bo() || d_resize_hb()) return; - auto v_dDeltap_self = ScatterViewHelper::value,decltype(dup_dDeltap_self),decltype(ndup_dDeltap_self)>::get(dup_dDeltap_self,ndup_dDeltap_self); - auto a_dDeltap_self = v_dDeltap_self.template access::value>(); + const auto v_dDeltap_self = ScatterViewHelper::value,decltype(dup_dDeltap_self),decltype(ndup_dDeltap_self)>::get(dup_dDeltap_self,ndup_dDeltap_self); + const auto a_dDeltap_self = v_dDeltap_self.template access::value>(); - auto v_total_bo = ScatterViewHelper::value,decltype(dup_total_bo),decltype(ndup_total_bo)>::get(dup_total_bo,ndup_total_bo); - auto a_total_bo = v_total_bo.template access::value>(); + const auto v_total_bo = ScatterViewHelper::value,decltype(dup_total_bo),decltype(ndup_total_bo)>::get(dup_total_bo,ndup_total_bo); + const auto a_total_bo = v_total_bo.template access::value>(); const int i = d_ilist[ii]; const X_FLOAT xtmp = x(i,0); @@ -2643,9 +2643,9 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview jj_max ? jj : jj_max; + jj_max = jj >= jj_max ? (jj+1) : jj_max; kk_min = kk < kk_min ? kk : kk_min; - kk_max = kk > kk_max ? kk : kk_max; + kk_max = kk >= kk_max ? (kk+1) : kk_max; } } } @@ -2667,11 +2667,11 @@ template KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeTorsion, const int &ii, EV_FLOAT_REAX& ev) const { - auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); - auto a_f = v_f.template access::value>(); + const auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + const auto a_f = v_f.template access::value>(); - auto v_CdDelta = ScatterViewHelper::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); - auto a_CdDelta = v_CdDelta.template access::value>(); + const auto v_CdDelta = ScatterViewHelper::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); + const auto a_CdDelta = v_CdDelta.template access::value>(); Kokkos::View::value,Kokkos::MemoryTraits::value> > a_Cdbo = d_Cdbo; //auto a_Cdbo = dup_Cdbo.template access::value>(); @@ -2698,6 +2698,11 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion::operator()(PairReaxComputeTorsion::operator()(PairReaxComputeTorsion Date: Wed, 7 Oct 2020 14:50:59 -0500 Subject: [PATCH 0028/1471] test templated workaround for reduce bug Change-Id: I858e0446c453b1d194b8c653d388c14aec450c63 --- src/KOKKOS/kokkos_type.h | 119 ++++++++++++++------------------------- 1 file changed, 41 insertions(+), 78 deletions(-) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index a00c43bd2d..fb0b4b7e50 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -374,149 +374,112 @@ typedef float E_FLOAT; typedef double E_FLOAT; #endif +template struct s_EV_FLOAT { E_FLOAT evdwl; E_FLOAT ecoul; - E_FLOAT v[6]; + E_FLOAT v[vsize]; KOKKOS_INLINE_FUNCTION s_EV_FLOAT() { evdwl = 0; ecoul = 0; - v[0] = 0; v[1] = 0; v[2] = 0; - v[3] = 0; v[4] = 0; v[5] = 0; + for (int i = 0; i < vsize; ++i) + v[i] = 0; } KOKKOS_INLINE_FUNCTION void operator+=(const s_EV_FLOAT &rhs) { evdwl += rhs.evdwl; ecoul += rhs.ecoul; - v[0] += rhs.v[0]; - v[1] += rhs.v[1]; - v[2] += rhs.v[2]; - v[3] += rhs.v[3]; - v[4] += rhs.v[4]; - v[5] += rhs.v[5]; + for (int i = 0; i < vsize; ++i) + v[i] += rhs.v[i]; } KOKKOS_INLINE_FUNCTION void operator+=(const volatile s_EV_FLOAT &rhs) volatile { evdwl += rhs.evdwl; ecoul += rhs.ecoul; - v[0] += rhs.v[0]; - v[1] += rhs.v[1]; - v[2] += rhs.v[2]; - v[3] += rhs.v[3]; - v[4] += rhs.v[4]; - v[5] += rhs.v[5]; + for (int i = 0; i < vsize; ++i) + v[i] += rhs.v[i]; } }; -typedef struct s_EV_FLOAT EV_FLOAT; +typedef struct s_EV_FLOAT<6> EV_FLOAT; +template struct s_EV_FLOAT_REAX { E_FLOAT evdwl; E_FLOAT ecoul; - E_FLOAT v[6]; - E_FLOAT ereax[9]; + E_FLOAT v[vsize]; + E_FLOAT ereax[esize]; KOKKOS_INLINE_FUNCTION s_EV_FLOAT_REAX() { evdwl = 0; ecoul = 0; - v[0] = 0; v[1] = 0; v[2] = 0; - v[3] = 0; v[4] = 0; v[5] = 0; - ereax[0] = 0; ereax[1] = 0; ereax[2] = 0; - ereax[3] = 0; ereax[4] = 0; ereax[5] = 0; - ereax[6] = 0; ereax[7] = 0; ereax[8] = 0; + for (int i = 0; i < vsize; ++i) + v[i] = 0; + for (int i = 0; i < esize; ++i) + ereax[i] = 0; } KOKKOS_INLINE_FUNCTION void operator+=(const s_EV_FLOAT_REAX &rhs) { evdwl += rhs.evdwl; ecoul += rhs.ecoul; - v[0] += rhs.v[0]; - v[1] += rhs.v[1]; - v[2] += rhs.v[2]; - v[3] += rhs.v[3]; - v[4] += rhs.v[4]; - v[5] += rhs.v[5]; - ereax[0] += rhs.ereax[0]; - ereax[1] += rhs.ereax[1]; - ereax[2] += rhs.ereax[2]; - ereax[3] += rhs.ereax[3]; - ereax[4] += rhs.ereax[4]; - ereax[5] += rhs.ereax[5]; - ereax[6] += rhs.ereax[6]; - ereax[7] += rhs.ereax[7]; - ereax[8] += rhs.ereax[8]; + for (int i = 0; i < vsize; ++i) + v[i] += rhs.v[i]; + for (int i = 0; i < esize; ++i) + ereax[i] += rhs.ereax[i]; } KOKKOS_INLINE_FUNCTION void operator+=(const volatile s_EV_FLOAT_REAX &rhs) volatile { evdwl += rhs.evdwl; ecoul += rhs.ecoul; - v[0] += rhs.v[0]; - v[1] += rhs.v[1]; - v[2] += rhs.v[2]; - v[3] += rhs.v[3]; - v[4] += rhs.v[4]; - v[5] += rhs.v[5]; - ereax[0] += rhs.ereax[0]; - ereax[1] += rhs.ereax[1]; - ereax[2] += rhs.ereax[2]; - ereax[3] += rhs.ereax[3]; - ereax[4] += rhs.ereax[4]; - ereax[5] += rhs.ereax[5]; - ereax[6] += rhs.ereax[6]; - ereax[7] += rhs.ereax[7]; - ereax[8] += rhs.ereax[8]; + for (int i = 0; i < vsize; ++i) + v[i] += rhs.v[i]; + for (int i = 0; i < esize; ++i) + ereax[i] += rhs.ereax[i]; } }; -typedef struct s_EV_FLOAT_REAX EV_FLOAT_REAX; +typedef struct s_EV_FLOAT_REAX<6,9> EV_FLOAT_REAX; +template struct s_FEV_FLOAT { - F_FLOAT f[3]; + F_FLOAT f[fsize]; E_FLOAT evdwl; E_FLOAT ecoul; - E_FLOAT v[6]; + E_FLOAT v[vsize]; KOKKOS_INLINE_FUNCTION s_FEV_FLOAT() { - f[0] = 0; f[1] = 0; f[2] = 0; evdwl = 0; ecoul = 0; - v[0] = 0; v[1] = 0; v[2] = 0; - v[3] = 0; v[4] = 0; v[5] = 0; + for (int i = 0; i < vsize; ++i) + v[i] = 0; + for (int i = 0; i < fsize; ++i) + f[i] = 0; } KOKKOS_INLINE_FUNCTION void operator+=(const s_FEV_FLOAT &rhs) { - f[0] += rhs.f[0]; - f[1] += rhs.f[1]; - f[2] += rhs.f[2]; evdwl += rhs.evdwl; ecoul += rhs.ecoul; - v[0] += rhs.v[0]; - v[1] += rhs.v[1]; - v[2] += rhs.v[2]; - v[3] += rhs.v[3]; - v[4] += rhs.v[4]; - v[5] += rhs.v[5]; + for (int i = 0; i < vsize; ++i) + v[i] += rhs.v[i]; + for (int i = 0; i < fsize; ++i) + f[i] += rhs.f[i]; } KOKKOS_INLINE_FUNCTION void operator+=(const volatile s_FEV_FLOAT &rhs) volatile { - f[0] += rhs.f[0]; - f[1] += rhs.f[1]; - f[2] += rhs.f[2]; evdwl += rhs.evdwl; ecoul += rhs.ecoul; - v[0] += rhs.v[0]; - v[1] += rhs.v[1]; - v[2] += rhs.v[2]; - v[3] += rhs.v[3]; - v[4] += rhs.v[4]; - v[5] += rhs.v[5]; + for (int i = 0; i < vsize; ++i) + v[i] += rhs.v[i]; + for (int i = 0; i < fsize; ++i) + f[i] += rhs.f[i]; } }; -typedef struct s_FEV_FLOAT FEV_FLOAT; +typedef struct s_FEV_FLOAT<6,3> FEV_FLOAT; #ifndef PREC_POS #define PREC_POS PRECISION From e2de1c5e8fa9efff1b47a93f1583eca8504ed507 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Thu, 8 Oct 2020 14:54:59 -0500 Subject: [PATCH 0029/1471] Make the torsion preview optional - enabled with -DHIP_OPT_TORSION_PREVIEW - reuse hiphostmalloced' memory when possible Change-Id: I25fbe7b06cffe1b6047048cd71f78bbf54d73774 --- src/KOKKOS/pair_reaxc_kokkos.cpp | 103 ++++++++++++++++++------------- src/KOKKOS/pair_reaxc_kokkos.h | 4 +- 2 files changed, 62 insertions(+), 45 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 635a3f8eac..50b186fc1d 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -63,6 +63,12 @@ PairReaxCKokkos::PairReaxCKokkos(LAMMPS *lmp) : PairReaxC(lmp) nmax = 0; maxbo = 1; maxhb = 1; + inum_store=-1; + counters = nullptr; + counters_jj_min = nullptr; + counters_jj_max = nullptr; + counters_kk_min = nullptr; + counters_kk_max = nullptr; k_error_flag = DAT::tdual_int_scalar("pair:error_flag"); k_nbuf_local = DAT::tdual_int_scalar("pair:nbuf_local"); @@ -93,6 +99,29 @@ PairReaxCKokkos::~PairReaxCKokkos() k_LR.h_view(i,j).d_CEclmb = decltype(k_LR.h_view(i,j).d_CEclmb)(); } } + + #ifdef HIP_OPT_TORSION_PREVIEW + if (counters != nullptr) { + hipHostFree(counters); + counters = nullptr; + } + if (counters_jj_min != nullptr) { + hipHostFree(counters_jj_min); + counters_jj_min = nullptr; + } + if (counters_jj_max != nullptr) { + hipHostFree(counters_jj_max); + counters_jj_max = nullptr; + } + if (counters_kk_min != nullptr) { + hipHostFree(counters_kk_min); + counters_kk_min = nullptr; + } + if (counters_kk_max != nullptr) { + hipHostFree(counters_kk_max); + counters_kk_max = nullptr; + } + #endif } /* ---------------------------------------------------------------------- */ @@ -916,50 +945,38 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) ev_all += ev; } else { - hipHostMalloc((void**) &counters,sizeof(int)*inum, hipHostMallocNonCoherent); - #if 1 - hipHostMalloc((void**) &counters_jj_min,sizeof(int)*inum, hipHostMallocNonCoherent); - hipHostMalloc((void**) &counters_jj_max,sizeof(int)*inum, hipHostMallocNonCoherent); - hipHostMalloc((void**) &counters_kk_min,sizeof(int)*inum, hipHostMallocNonCoherent); - hipHostMalloc((void**) &counters_kk_max,sizeof(int)*inum, hipHostMallocNonCoherent); - #else - hipMalloc((void**) &counters_jj_min,sizeof(int)*inum); - hipMalloc((void**) &counters_jj_max,sizeof(int)*inum); - hipMalloc((void**) &counters_kk_min,sizeof(int)*inum); - hipMalloc((void**) &counters_kk_max,sizeof(int)*inum); - #endif - - if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); - else{ - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); - hipDeviceSynchronize(); - int nnz = 0; - for (int i = 0; i < inum; ++i){ - if (counters[i] > 0){ - counters[nnz] = i; - nnz++; - } - } - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); - } - ev_all += ev; + #ifdef HIP_OPT_TORSION_PREVIEW + if (inum > inum_store) { + inum_store = inum; + // realloc host arrays + hipHostMalloc((void**) &counters,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_jj_min,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_jj_max,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_kk_min,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_kk_max,sizeof(int)*inum, hipHostMallocNonCoherent); + } + if (evflag) { + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); + } else{ + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); hipDeviceSynchronize(); - - - hipHostFree(counters); - #if 1 - hipHostFree(counters_jj_min); - hipHostFree(counters_jj_max); - hipHostFree(counters_kk_min); - hipHostFree(counters_kk_max); - #else - hipFree(counters_jj_min); - hipFree(counters_jj_max); - hipFree(counters_kk_min); - hipFree(counters_kk_max); - #endif - + int nnz = 0; + for (int i = 0; i < inum; ++i){ + if (counters[i] > 0){ + counters[nnz] = i; + nnz++; + } + } + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); + } + #else + if (evflag) { + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); + } else{ + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); + } + #endif + ev_all += ev; } pvector[8] = ev.ereax[6]; pvector[9] = ev.ereax[7]; diff --git a/src/KOKKOS/pair_reaxc_kokkos.h b/src/KOKKOS/pair_reaxc_kokkos.h index fb7295c56b..b8540ebfe7 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.h +++ b/src/KOKKOS/pair_reaxc_kokkos.h @@ -348,9 +348,9 @@ class PairReaxCKokkos : public PairReaxC { void Deallocate_Lookup_Tables(); void LR_vdW_Coulomb( int i, int j, double r_ij, LR_data *lr ); - int* counters; + int *counters; int *counters_jj_min, *counters_jj_max,*counters_kk_min,*counters_kk_max; - size_t counters_length; + int inum_store; typedef Kokkos::DualView tdual_int_1d; Kokkos::DualView k_params_sing; From dabb120096fb706e727a3c0d30a648500018a5ae Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Thu, 8 Oct 2020 16:40:38 -0500 Subject: [PATCH 0030/1471] fix undefined nnz w/ HIP_OPT_TORSION_PREVIEW off Change-Id: Ie11be12a98ba48b9134537b5389b2b9300dc2be8 --- src/KOKKOS/pair_reaxc_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 50b186fc1d..ed863eb1b2 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -973,7 +973,7 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) if (evflag) { Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); } else{ - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); } #endif ev_all += ev; From fee6d98201afc91374c34f479279bbc1d5222449 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Thu, 8 Oct 2020 19:50:23 -0500 Subject: [PATCH 0031/1471] Catch missing HIP_OPT_TORSION_PREVIEW parts in kernel Change-Id: Ib0ed83940bcd870a9531d596422956741e4f735a --- src/KOKKOS/pair_reaxc_kokkos.cpp | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index ed863eb1b2..959b4e26e3 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -2715,12 +2715,15 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion::operator()(PairReaxComputeTorsion::operator()(PairReaxComputeTorsion::operator()(PairReaxComputeTorsion, const int &ii) const { EV_FLOAT_REAX ev; + #if HIP_OPT_TORSION_PREVIEW this->template operator()(PairReaxComputeTorsion(), counters[ii], ev); + #else + this->template operator()(PairReaxComputeTorsion(), ii, ev); + #endif } From b0f638d937266f49a697c1c74e9c7c2a3bd0412d Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Thu, 8 Oct 2020 21:07:00 -0500 Subject: [PATCH 0032/1471] fix bug where preview counters were being used in reduce (but was iterating over inum) Change-Id: Ic60f40d5adcc6505ced83c7d5694813e161f73ac --- src/KOKKOS/pair_reaxc_kokkos.cpp | 41 ++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 959b4e26e3..321a1f9ade 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -955,18 +955,18 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) hipHostMalloc((void**) &counters_kk_min,sizeof(int)*inum, hipHostMallocNonCoherent); hipHostMalloc((void**) &counters_kk_max,sizeof(int)*inum, hipHostMallocNonCoherent); } - if (evflag) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); - } else{ - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); - hipDeviceSynchronize(); - int nnz = 0; - for (int i = 0; i < inum; ++i){ - if (counters[i] > 0){ - counters[nnz] = i; - nnz++; - } + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + hipDeviceSynchronize(); + int nnz = 0; + for (int i = 0; i < inum; ++i){ + if (counters[i] > 0){ + counters[nnz] = i; + nnz++; } + } + if (evflag) { + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this,ev); + } else{ Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); } #else @@ -2682,7 +2682,16 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview template KOKKOS_INLINE_FUNCTION -void PairReaxCKokkos::operator()(PairReaxComputeTorsion, const int &ii, EV_FLOAT_REAX& ev) const { +void PairReaxCKokkos::operator()(PairReaxComputeTorsion, + #ifdef HIP_OPT_TORSION_PREVIEW + const int &_ii, + #else + const int &ii, + #endif + EV_FLOAT_REAX& ev) const { + #ifdef HIP_OPT_TORSION_PREVIEW + const int ii = counters[_ii]; + #endif const auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); const auto a_f = v_f.template access::value>(); @@ -2789,8 +2798,9 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion::operator()(PairReaxComputeTorsion, const int &ii) const { EV_FLOAT_REAX ev; - #if HIP_OPT_TORSION_PREVIEW - this->template operator()(PairReaxComputeTorsion(), counters[ii], ev); - #else this->template operator()(PairReaxComputeTorsion(), ii, ev); - #endif - } From 775167ecd9c524a48e7a6be1298d91494083e41f Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Fri, 9 Oct 2020 09:39:27 -0500 Subject: [PATCH 0033/1471] apply torsion preview to all cases Change-Id: Icc619a89fc4af98735c0fbbeb52d655dc60b10fd --- src/KOKKOS/pair_reaxc_kokkos.cpp | 62 +++++++++++++++----------------- src/KOKKOS/pair_reaxc_kokkos.h | 6 ++-- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 321a1f9ade..a718560b55 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -936,48 +936,45 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) pvector[6] = ev.ereax[5]; ev_all.evdwl += ev.ereax[3] + ev.ereax[4] + ev.ereax[5]; + #ifdef HIP_OPT_TORSION_PREVIEW + if (inum > inum_store) { + inum_store = inum; + // realloc host arrays + hipHostMalloc((void**) &counters,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_jj_min,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_jj_max,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_kk_min,sizeof(int)*inum, hipHostMallocNonCoherent); + hipHostMalloc((void**) &counters_kk_max,sizeof(int)*inum, hipHostMallocNonCoherent); + } + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + hipDeviceSynchronize(); + int nnz = 0; + for (int i = 0; i < inum; ++i){ + if (counters[i] > 0){ + counters[nnz] = i; + nnz++; + } + } + #else // !HIP_OPT_TORSION_PREVIEW + int nnz = inum; + #endif + // Torsion if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); ev_all += ev; } else { - - #ifdef HIP_OPT_TORSION_PREVIEW - if (inum > inum_store) { - inum_store = inum; - // realloc host arrays - hipHostMalloc((void**) &counters,sizeof(int)*inum, hipHostMallocNonCoherent); - hipHostMalloc((void**) &counters_jj_min,sizeof(int)*inum, hipHostMallocNonCoherent); - hipHostMalloc((void**) &counters_jj_max,sizeof(int)*inum, hipHostMallocNonCoherent); - hipHostMalloc((void**) &counters_kk_min,sizeof(int)*inum, hipHostMallocNonCoherent); - hipHostMalloc((void**) &counters_kk_max,sizeof(int)*inum, hipHostMallocNonCoherent); - } - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); - hipDeviceSynchronize(); - int nnz = 0; - for (int i = 0; i < inum; ++i){ - if (counters[i] > 0){ - counters[nnz] = i; - nnz++; - } - } if (evflag) { Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this,ev); } else{ Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); } - #else - if (evflag) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); - } else{ - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); - } - #endif ev_all += ev; } + pvector[8] = ev.ereax[6]; pvector[9] = ev.ereax[7]; ev_all.evdwl += ev.ereax[6] + ev.ereax[7]; @@ -2595,9 +2592,8 @@ void PairReaxCKokkos::operator()(PairReaxComputeAngular -template KOKKOS_INLINE_FUNCTION -void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, const int &ii) const { +void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, const int &ii) const { F_FLOAT bo_ij, bo_ik, bo_jl; //F_FLOAT fn10, f11_DiDj, dfn11, fn12; @@ -2684,13 +2680,13 @@ template KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeTorsion, #ifdef HIP_OPT_TORSION_PREVIEW - const int &_ii, + const int &iii, #else const int &ii, #endif EV_FLOAT_REAX& ev) const { #ifdef HIP_OPT_TORSION_PREVIEW - const int ii = counters[_ii]; + const int ii = counters[iii]; #endif const auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); diff --git a/src/KOKKOS/pair_reaxc_kokkos.h b/src/KOKKOS/pair_reaxc_kokkos.h index b8540ebfe7..089cb34c65 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.h +++ b/src/KOKKOS/pair_reaxc_kokkos.h @@ -95,7 +95,6 @@ template struct PairReaxComputeAngular{}; //LG sorting atoms for processing in PairReaxComputeTorsion -template struct PairReaxComputeTorsion_preview{}; template @@ -223,10 +222,9 @@ class PairReaxCKokkos : public PairReaxC { template KOKKOS_INLINE_FUNCTION void operator()(PairReaxComputeTorsion, const int&, EV_FLOAT_REAX&) const; - - template + KOKKOS_INLINE_FUNCTION - void operator()(PairReaxComputeTorsion_preview, const int&) const; + void operator()(PairReaxComputeTorsion_preview, const int&) const; template KOKKOS_INLINE_FUNCTION From 397b098ac0275059321f7e0ac9a8f5bad16a5212 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Mon, 12 Oct 2020 15:41:37 -0500 Subject: [PATCH 0034/1471] fix memory leak in counter allocs Change-Id: Ia45e3e5d1ce92b4fd537a031181bf719a243a8b1 --- src/KOKKOS/pair_reaxc_kokkos.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index a718560b55..762f3ee2a3 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -938,6 +938,13 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) #ifdef HIP_OPT_TORSION_PREVIEW if (inum > inum_store) { + if (counters != nullptr) { + hipHostFree(counters); + hipHostFree(counters_jj_min); + hipHostFree(counters_jj_max); + hipHostFree(counters_kk_min); + hipHostFree(counters_kk_max); + } inum_store = inum; // realloc host arrays hipHostMalloc((void**) &counters,sizeof(int)*inum, hipHostMallocNonCoherent); From c6efc01a7fe17705c75b17593db1e85165e9fab7 Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Tue, 13 Oct 2020 17:45:43 -0400 Subject: [PATCH 0035/1471] Revised Torsion kernel with -DHIP_PRINTF_WORKAROUND Change-Id: Iffee0dfbd507f1a0b873a86a8b1c71ffcb5b344e --- src/KOKKOS/pair_reaxc_kokkos.cpp | 76 ++++++++++++++++++++++++++------ src/KOKKOS/pair_reaxc_kokkos.h | 4 +- 2 files changed, 65 insertions(+), 15 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 762f3ee2a3..7a7819823b 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -962,25 +962,41 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) nnz++; } } + if (neighflag == HALF) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); + ev_all += ev; + } else { + if (evflag) { + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this,ev); + } else{ + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); + } + ev_all += ev; + } + + #else // !HIP_OPT_TORSION_PREVIEW - int nnz = inum; - #endif + // Torsion if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); ev_all += ev; } else { if (evflag) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); } else{ - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); } ev_all += ev; } + #endif pvector[8] = ev.ereax[6]; pvector[9] = ev.ereax[7]; @@ -2651,7 +2667,13 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, con const int k_index = kk - j_start; bo_ik = d_BO(i,k_index); if (bo_ik < thb_cut) continue; - +#if 1 + counter++; + jj_min = jj < jj_min ? jj : jj_min; + jj_max = jj >= jj_max ? (jj+1) : jj_max; + kk_min = kk < kk_min ? kk : kk_min; + kk_max = kk >= kk_max ? (kk+1) : kk_max; +#else for (int ll = l_start; ll < l_end; ll++) { int l = d_bo_list[ll]; l &= NEIGHMASK; @@ -2667,10 +2689,24 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, con kk_min = kk < kk_min ? kk : kk_min; kk_max = kk >= kk_max ? (kk+1) : kk_max; } +#endif } } counters[ii] = counter; if (counter > 0){ + //LG should not be needed ... test and consider removing later + // if (jj_min > j_end) jj_min=j_end; + // if (jj_max > j_end) jj_max=j_end; + + // if (kk_min > j_end) kk_min=j_end; + // if (kk_max > j_end) kk_max=j_end; + + // if (jj_min < j_start) jj_min=j_start; + // if (jj_max < j_start) jj_max=j_start; + + // if (kk_min < j_start) kk_min=j_start; + // if (kk_max < j_start) kk_max=j_start; + counters_jj_min[ii] = jj_min; counters_jj_max[ii] = jj_max; counters_kk_min[ii] = kk_min; @@ -2685,15 +2721,12 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, con template template KOKKOS_INLINE_FUNCTION -void PairReaxCKokkos::operator()(PairReaxComputeTorsion, - #ifdef HIP_OPT_TORSION_PREVIEW - const int &iii, - #else - const int &ii, - #endif - EV_FLOAT_REAX& ev) const { +void PairReaxCKokkos::operator()(PairReaxComputeTorsion, const int &iii, EV_FLOAT_REAX& ev) const { + #ifdef HIP_OPT_TORSION_PREVIEW const int ii = counters[iii]; + #else + const int ii = iii; #endif const auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); @@ -3073,6 +3106,20 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion @@ -3221,6 +3268,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeHydrogen diff --git a/src/KOKKOS/pair_reaxc_kokkos.h b/src/KOKKOS/pair_reaxc_kokkos.h index 089cb34c65..42a55224c9 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.h +++ b/src/KOKKOS/pair_reaxc_kokkos.h @@ -11,6 +11,8 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include +#include #ifdef PAIR_CLASS @@ -222,7 +224,7 @@ class PairReaxCKokkos : public PairReaxC { template KOKKOS_INLINE_FUNCTION void operator()(PairReaxComputeTorsion, const int&, EV_FLOAT_REAX&) const; - + KOKKOS_INLINE_FUNCTION void operator()(PairReaxComputeTorsion_preview, const int&) const; From 20dc9576d9771ec117cd0d9768b98d8d3aa50d00 Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Thu, 8 Oct 2020 09:31:04 -0400 Subject: [PATCH 0036/1471] updated PairReaxComputeLJCoulomb. use -DPairReaxComputeLJCoulomb_BLOCKING to activate Change-Id: Idef8dafc79216368bffbf90547016afca5f32a43 --- src/KOKKOS/pair_reaxc_kokkos.cpp | 176 ++++++++++++++++++++++++++++++- 1 file changed, 172 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 7a7819823b..a77aa4f477 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -781,14 +781,14 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) } else { if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<128, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<128, 1> >(0,inum),*this); } else if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<128, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<128, 1> >(0,inum),*this); } } ev_all += ev; @@ -1177,6 +1177,173 @@ void PairReaxCKokkos::operator()(PairReaxComputeLJCoulomb= nlocal) { + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) FLAG_CONTINUE=true; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) FLAG_CONTINUE=true; + } else { + if (x(j,2) < ztmp) FLAG_CONTINUE=true; + else if (x(j,2) == ztmp && x(j,1) < ytmp) FLAG_CONTINUE=true; + else if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) FLAG_CONTINUE=true; + } + } + if (FLAG_CONTINUE==false){ + const X_FLOAT delx = x(j,0) - xtmp; + const X_FLOAT dely = x(j,1) - ytmp; + const X_FLOAT delz = x(j,2) - ztmp; + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + if (rsq > cut_nbsq) FLAG_CONTINUE=true; + } + if (FLAG_CONTINUE == false){ + selected_jj[nnz] = jj_current; + nnz++; + } + jj_current++; + if (jj_current == jnum) break; + } + + for (int jj_inner = 0; jj_inner < nnz; jj_inner++){ + const int jj = selected_jj[jj_inner]; + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const int jtype = type(j); + //const tagint jtag = tag(j); + const F_FLOAT qj = q(j); + + const X_FLOAT delx = x(j,0) - xtmp; + const X_FLOAT dely = x(j,1) - ytmp; + const X_FLOAT delz = x(j,2) - ztmp; + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + //if (rsq > cut_nbsq) continue; + const F_FLOAT rij = sqrt(rsq); + + // LJ energy/force + F_FLOAT Tap = d_tap[7] * rij + d_tap[6]; + Tap = Tap * rij + d_tap[5]; + Tap = Tap * rij + d_tap[4]; + Tap = Tap * rij + d_tap[3]; + Tap = Tap * rij + d_tap[2]; + Tap = Tap * rij + d_tap[1]; + Tap = Tap * rij + d_tap[0]; + + F_FLOAT dTap = 7*d_tap[7] * rij + 6*d_tap[6]; + dTap = dTap * rij + 5*d_tap[5]; + dTap = dTap * rij + 4*d_tap[4]; + dTap = dTap * rij + 3*d_tap[3]; + dTap = dTap * rij + 2*d_tap[2]; + dTap += d_tap[1]/rij; + + const F_FLOAT gamma_w = paramstwbp(itype,jtype).gamma_w; + const F_FLOAT alpha = paramstwbp(itype,jtype).alpha; + const F_FLOAT r_vdw = paramstwbp(itype,jtype).r_vdw; + const F_FLOAT epsilon = paramstwbp(itype,jtype).epsilon; + + + // shielding + if (vdwflag == 1 || vdwflag == 3) { + #ifdef HIP_OPT_USE_LESS_MATH + F_FLOAT tmp_var; + tmp_var = pow(rij,gp[28]-2.0); + powr_vdw = tmp_var*rij*rij; + powgi_vdw = pow(1.0/gamma_w,gp[28]); + dfn13 = pow(powr_vdw+powgi_vdw,1.0/gp[28]-1.0); + fn13 = dfn13*(powr_vdw+powgi_vdw); + dfn13 = dfn13*tmp_var; + + exp2 = exp(0.5*alpha*(1.0-fn13/r_vdw)); + exp1 = exp2*exp2; + #else + powr_vdw = pow(rij,gp[28]); + powgi_vdw = pow(1.0/gamma_w,gp[28]); + + fn13 = pow(powr_vdw+powgi_vdw,1.0/gp[28]); + + exp1 = exp(alpha*(1.0-fn13/r_vdw)); + exp2 = exp(0.5*alpha*(1.0-fn13/r_vdw)); + + dfn13 = pow(powr_vdw+powgi_vdw,1.0/gp[28]-1.0)*pow(rij,gp[28]-2.0); + #endif + + etmp = epsilon*(exp1-2.0*exp2); + evdwl = Tap*etmp; + fvdwl = dTap*etmp-Tap*epsilon*(alpha/r_vdw)*(exp1-exp2)*dfn13; + } else { + #ifdef HIP_OPT_USE_LESS_MATH + exp2 = exp(0.5*alpha*(1.0-rij/r_vdw)); + exp1 = exp2*exp2; + #else + exp1 = exp(alpha*(1.0-rij/r_vdw)); + exp2 = exp(0.5*alpha*(1.0-rij/r_vdw)); + #endif + etmp = epsilon*(exp1-2.0*exp2); + evdwl = Tap*etmp; + fvdwl = dTap*etmp-Tap*epsilon*(alpha/r_vdw)*(exp1-exp2)*rij; + } + // inner wall + if (vdwflag == 2 || vdwflag == 3) { + const F_FLOAT ecore = paramstwbp(itype,jtype).ecore; + const F_FLOAT acore = paramstwbp(itype,jtype).acore; + const F_FLOAT rcore = paramstwbp(itype,jtype).rcore; + const F_FLOAT e_core = ecore*exp(acore*(1.0-(rij/rcore))); + const F_FLOAT de_core = -(acore/rcore)*e_core; + evdwl += Tap*e_core; + fvdwl += dTap*e_core+Tap*de_core/rij; + + if (lgflag) { + const F_FLOAT lgre = paramstwbp(itype,jtype).lgre; + const F_FLOAT lgcij = paramstwbp(itype,jtype).lgcij; + const F_FLOAT rij5 = rsq*rsq*rij; + const F_FLOAT rij6 = rij5*rij; + const F_FLOAT re6 = lgre*lgre*lgre*lgre*lgre*lgre; + const F_FLOAT elg = -lgcij/(rij6+re6); + const F_FLOAT delg = -6.0*elg*rij5/(rij6+re6); + evdwl += Tap*elg; + fvdwl += dTap*elg+Tap*delg/rij; + } + } + + // Coulomb energy/force + const F_FLOAT shld = paramstwbp(itype,jtype).gamma; + const F_FLOAT denom1 = rij * rij * rij + shld; + const F_FLOAT denom3 = pow(denom1,0.3333333333333); + const F_FLOAT ecoul = C_ele * qi*qj*Tap/denom3; + const F_FLOAT fcoul = C_ele * qi*qj*(dTap-Tap*rij/denom1)/denom3; + + const F_FLOAT ftotal = fvdwl + fcoul; + fxtmp += delx*ftotal; + a_f(j,0) -= delx*ftotal; + fytmp += dely*ftotal; + a_f(j,1) -= dely*ftotal; + fztmp += delz*ftotal; + a_f(j,2) -= delz*ftotal; + + if (eflag) ev.evdwl += evdwl; + if (eflag) ev.ecoul += ecoul; + + if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,evdwl+ecoul,-ftotal,delx,dely,delz); + } + } + +#else for (int jj = 0; jj < jnum; jj++) { int j = d_neighbors(i,jj); @@ -1311,6 +1478,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeLJCoulombtemplate ev_tally(ev,i,j,evdwl+ecoul,-ftotal,delx,dely,delz); } +#endif a_f(i,0) += fxtmp; a_f(i,1) += fytmp; From 117995c2fc60506fb5f3e070a9b32cc98cff0067 Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Thu, 8 Oct 2020 14:14:48 -0400 Subject: [PATCH 0037/1471] updated PairReaxBuildListsHalf use -DPairReaxBuildListsHalf_BLOCKING to activate Change-Id: I8d1ec5ed9ec7042e9cdc53da825c07d3bc44e2bd --- src/KOKKOS/pair_reaxc_kokkos.cpp | 215 +++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index a77aa4f477..3348aec717 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -1898,6 +1898,219 @@ void PairReaxCKokkos::operator()(PairReaxBuildListsHalf, } } + #ifdef HIP_OPT_PAIRREAXBUILDLISTSHALF_BLOCKING + + unsigned short int BLK_SZ=80; + unsigned short int nnz; + unsigned short int selected_jj[80]; + unsigned short int jj_current = 0; + + + + while (jj_current < jnum) { + nnz=0; + + while (nnz < BLK_SZ) { + int jj = jj_current; + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + + d_bo_first[j] = j*maxbo; + d_hb_first[j] = j*maxhb; + + delij[0] = x(j,0) - xtmp; + delij[1] = x(j,1) - ytmp; + delij[2] = x(j,2) - ztmp; + const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2]; + + double cutoffsq; + if(i < nlocal) cutoffsq = MAX(cut_bosq,cut_hbsq); + else cutoffsq = cut_bosq; + if (rsq <= cutoffsq){ + selected_jj[nnz] = jj_current; + nnz++; + } + jj_current++; + if (jj_current == jnum) break; + } + + for (int jj_inner = 0; jj_inner < nnz; jj_inner++){ + const int jj = selected_jj[jj_inner]; + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const int jtype = type(j); + delij[0] = x(j,0) - xtmp; + delij[1] = x(j,1) - ytmp; + delij[2] = x(j,2) - ztmp; + const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2]; + + double cutoffsq; + if(i < nlocal) cutoffsq = MAX(cut_bosq,cut_hbsq); + else cutoffsq = cut_bosq; + + // hbond list + if (i < nlocal && cut_hbsq > 0.0 && (ihb == 1 || ihb == 2) && rsq <= cut_hbsq) { + jhb = paramssing(jtype).p_hbond; + if (ihb == 1 && jhb == 2) { + if (NEIGHFLAG == HALF) { + j_index = hb_first_i + d_hb_num[i]; + d_hb_num[i]++; + } else { + j_index = hb_first_i + Kokkos::atomic_fetch_add(&d_hb_num[i],1); + } + + const int jj_index = j_index - hb_first_i; + + if (jj_index >= maxhb) { + d_resize_hb() = 1; + return; + } + + d_hb_list[j_index] = j; + } + else if ( j < nlocal && ihb == 2 && jhb == 1) { + if (NEIGHFLAG == HALF) { + i_index = d_hb_first[j] + d_hb_num[j]; + d_hb_num[j]++; + } else { + i_index = d_hb_first[j] + Kokkos::atomic_fetch_add(&d_hb_num[j],1); + } + + const int ii_index = i_index - d_hb_first[j]; + + if (ii_index >= maxhb) { + d_resize_hb() = 1; + return; + } + + d_hb_list[i_index] = i; + } + + } + + + if (rsq > cut_bosq) continue; + + + + // bond_list + const F_FLOAT rij = sqrt(rsq); + const F_FLOAT p_bo1 = paramstwbp(itype,jtype).p_bo1; + const F_FLOAT p_bo2 = paramstwbp(itype,jtype).p_bo2; + const F_FLOAT p_bo3 = paramstwbp(itype,jtype).p_bo3; + const F_FLOAT p_bo4 = paramstwbp(itype,jtype).p_bo4; + const F_FLOAT p_bo5 = paramstwbp(itype,jtype).p_bo5; + const F_FLOAT p_bo6 = paramstwbp(itype,jtype).p_bo6; + const F_FLOAT r_s = paramstwbp(itype,jtype).r_s; + const F_FLOAT r_pi = paramstwbp(itype,jtype).r_pi; + const F_FLOAT r_pi2 = paramstwbp(itype,jtype).r_pi2; + + if (paramssing(itype).r_s > 0.0 && paramssing(jtype).r_s > 0.0) { + C12 = p_bo1*pow(rij/r_s,p_bo2); + BO_s = (1.0+bo_cut)*exp(C12); + } + else BO_s = C12 = 0.0; + + if (paramssing(itype).r_pi > 0.0 && paramssing(jtype).r_pi > 0.0) { + C34 = p_bo3*pow(rij/r_pi,p_bo4); + BO_pi = exp(C34); + } + else BO_pi = C34 = 0.0; + + if (paramssing(itype).r_pi2 > 0.0 && paramssing(jtype).r_pi2 > 0.0) { + C56 = p_bo5*pow(rij/r_pi2,p_bo6); + BO_pi2 = exp(C56); + } + else BO_pi2 = C56 = 0.0; + + BO = BO_s + BO_pi + BO_pi2; + if (BO < bo_cut) continue; + + + if (NEIGHFLAG == HALF) { + j_index = bo_first_i + d_bo_num[i]; + i_index = d_bo_first[j] + d_bo_num[j]; + d_bo_num[i]++; + d_bo_num[j]++; + } + else { + j_index = bo_first_i + Kokkos::atomic_fetch_add(&d_bo_num[i],1); + i_index = d_bo_first[j] + Kokkos::atomic_fetch_add(&d_bo_num[j],1); + } + + const int jj_index = j_index - bo_first_i; + const int ii_index = i_index - d_bo_first[j]; + + if (jj_index >= maxbo || ii_index >= maxbo) { + d_resize_bo() = 1; + return; + } + + + d_bo_list[j_index] = j; + d_bo_list[i_index] = i; + + // from BondOrder1 + + d_BO(i,jj_index) = BO; + d_BO_s(i,jj_index) = BO_s; + d_BO_pi(i,jj_index) = BO_pi; + d_BO_pi2(i,jj_index) = BO_pi2; + + d_BO(j,ii_index) = BO; + d_BO_s(j,ii_index) = BO_s; + d_BO_pi(j,ii_index) = BO_pi; + d_BO_pi2(j,ii_index) = BO_pi2; + + F_FLOAT Cln_BOp_s = p_bo2 * C12 / rij / rij; + F_FLOAT Cln_BOp_pi = p_bo4 * C34 / rij / rij; + F_FLOAT Cln_BOp_pi2 = p_bo6 * C56 / rij / rij; + + if (nlocal == 0) + Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0.0; + + for (int d = 0; d < 3; d++) dln_BOp_pi_i[d] = -(BO_pi*Cln_BOp_pi)*delij[d]; + for (int d = 0; d < 3; d++) dln_BOp_pi2_i[d] = -(BO_pi2*Cln_BOp_pi2)*delij[d]; + for (int d = 0; d < 3; d++) dBOp_i[d] = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2)*delij[d]; + for (int d = 0; d < 3; d++) a_dDeltap_self(i,d) += dBOp_i[d]; + for (int d = 0; d < 3; d++) a_dDeltap_self(j,d) += -dBOp_i[d]; + + d_dln_BOp_pix(i,jj_index) = dln_BOp_pi_i[0]; + d_dln_BOp_piy(i,jj_index) = dln_BOp_pi_i[1]; + d_dln_BOp_piz(i,jj_index) = dln_BOp_pi_i[2]; + + d_dln_BOp_pix(j,ii_index) = -dln_BOp_pi_i[0]; + d_dln_BOp_piy(j,ii_index) = -dln_BOp_pi_i[1]; + d_dln_BOp_piz(j,ii_index) = -dln_BOp_pi_i[2]; + + d_dln_BOp_pi2x(i,jj_index) = dln_BOp_pi2_i[0]; + d_dln_BOp_pi2y(i,jj_index) = dln_BOp_pi2_i[1]; + d_dln_BOp_pi2z(i,jj_index) = dln_BOp_pi2_i[2]; + + d_dln_BOp_pi2x(j,ii_index) = -dln_BOp_pi2_i[0]; + d_dln_BOp_pi2y(j,ii_index) = -dln_BOp_pi2_i[1]; + d_dln_BOp_pi2z(j,ii_index) = -dln_BOp_pi2_i[2]; + + d_dBOpx(i,jj_index) = dBOp_i[0]; + d_dBOpy(i,jj_index) = dBOp_i[1]; + d_dBOpz(i,jj_index) = dBOp_i[2]; + + d_dBOpx(j,ii_index) = -dBOp_i[0]; + d_dBOpy(j,ii_index) = -dBOp_i[1]; + d_dBOpz(j,ii_index) = -dBOp_i[2]; + + d_BO(i,jj_index) -= bo_cut; + d_BO(j,ii_index) -= bo_cut; + d_BO_s(i,jj_index) -= bo_cut; + d_BO_s(j,ii_index) -= bo_cut; + total_bo += d_BO(i,jj_index); + a_total_bo[j] += d_BO(j,ii_index); + } + } + + + + #else for (int jj = 0; jj < jnum; jj++) { int j = d_neighbors(i,jj); j &= NEIGHMASK; @@ -2066,6 +2279,8 @@ void PairReaxCKokkos::operator()(PairReaxBuildListsHalf, total_bo += d_BO(i,jj_index); a_total_bo[j] += d_BO(j,ii_index); } + #endif + a_total_bo[i] += total_bo; } From 33346ae961b9cfbf54ae254812d72715fe498520 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Wed, 7 Oct 2020 15:37:18 -0500 Subject: [PATCH 0038/1471] change teamsize Change-Id: Ie7666e8ec8192fced098fe744f318aaf25365f5a --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 5e2094533f..34bc85d364 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -732,7 +732,7 @@ void FixQEqReaxKokkos::cg_solve1() int teamsize; if (execution_space == Host) teamsize = 1; - else teamsize = 128; + else teamsize = 64;//128 // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse12Functor sparse12_functor(this); From 4d9c2a97d2f0f1cedbe3379d5cab470345255434 Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Thu, 8 Oct 2020 16:52:36 -0400 Subject: [PATCH 0039/1471] replacing pow(a,1/3) with the cbrt(a) call - optimization is activated using the existing HIP_OPT_USE_LESS_MATH flag Change-Id: Ic6670c54f453127e948036e5c9a88154b5af6e68 --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 4 ++++ src/KOKKOS/pair_reaxc_kokkos.cpp | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 34bc85d364..d5ca6b2909 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -697,7 +697,11 @@ double FixQEqReaxKokkos::calculate_H_k(const F_FLOAT &r, const F_FLO taper = taper * r + d_tap[0]; denom = r * r * r + shld; + #ifdef HIP_OPT_USE_LESS_MATH + denom = cbrt(denom); + #else denom = pow(denom,0.3333333333333); + #endif return taper * EV_TO_KCAL_PER_MOL / denom; } diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 3348aec717..7797915992 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -680,8 +680,11 @@ void PairReaxCKokkos::LR_vdW_Coulomb( int i, int j, double r_ij, LR_ /* Coulomb calculations */ dr3gamij_1 = ( r_ij * r_ij * r_ij + twbp->gamma ); + #ifdef HIP_OPT_USE_LESS_MATH + dr3gamij_3 = cbrt(dr3gamij_1); + #else dr3gamij_3 = pow( dr3gamij_1 , 0.33333333333333 ); - + #endif tmp = Tap / dr3gamij_3; lr->H = EV_to_KCALpMOL * tmp; lr->e_ele = C_ele * tmp; @@ -1461,7 +1464,11 @@ void PairReaxCKokkos::operator()(PairReaxComputeLJCoulomb Date: Thu, 8 Oct 2020 18:20:02 -0400 Subject: [PATCH 0040/1471] Avoid loading data twice if itype==jtype Change-Id: I430a761555bdbd93e3ca1b298c962b75b91175aa --- src/KOKKOS/pair_eam_kokkos.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 08e164a6c9..39bac1b78b 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -755,9 +755,14 @@ void PairEAMKokkos::operator()(TagPairEAMKernelC Date: Fri, 9 Oct 2020 18:20:20 -0400 Subject: [PATCH 0041/1471] optimized pair_eam_kokkos. need more work to avoid large shared memory allocations Change-Id: I1c23b0e10fdfa932b17ad764cf5a1d024b43d80a --- src/KOKKOS/pair_eam_kokkos.cpp | 81 ++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 18 deletions(-) diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 39bac1b78b..39869e4d61 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -180,9 +180,9 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) // compute kernel AB if (eflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy,Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy,Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); } if (eflag) { @@ -201,41 +201,41 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) if (evflag) { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); } } } else { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); } } } @@ -637,6 +637,21 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int // rho = density at each atom // loop over neighbors of my atoms + +#ifdef KOKKOS_ENABLE_HIP +#ifdef __HIP_DEVICE_COMPILE__ + __shared__ F_FLOAT A[500][7]; + + for (int i = threadIdx.y; i < 500*7; i+=blockDim.y){ + int j = i%7; + int m = i/7; + A[m][j] = d_rhor_spline(0,m,j); + } + __syncthreads(); +#endif +#endif + + const int i = d_ilist[ii]; const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); @@ -663,8 +678,18 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int p -= m; p = MIN(p,1.0); const int d_type2rhor_ji = d_type2rhor(jtype,itype); - rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + - d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + #ifdef __HIP_DEVICE_COMPILE__ + if (d_type2rhor_ji == 0){ + rhotmp += ( (A[m][3]*p + A[m][4])*p + + A[m][5] )*p + A[m][6]; + } + else + rhotmp += ( (d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + #else + rhotmp += ( (d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + #endif } } @@ -706,10 +731,25 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int template template KOKKOS_INLINE_FUNCTION -void PairEAMKokkos::operator()(TagPairEAMKernelC, const int &ii, EV_FLOAT& ev) const { +void PairEAMKokkos::operator()(g, const int &ii, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + +#ifdef KOKKOS_ENABLE_HIP +#ifdef __HIP_DEVICE_COMPILE__ + __shared__ F_FLOAT A[500][7]; //LG assuming second dimension of d_z2r_spline is 500; + //need to recode to "nr+1" + for (int i = threadIdx.y; i < 500*7; i+=blockDim.y){ + int j = i%7; + int m = i/7; + A[m][j] = d_z2r_spline(0,m,j); + } + __syncthreads(); +#endif +#endif + + auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); @@ -765,16 +805,22 @@ void PairEAMKokkos::operator()(TagPairEAMKernelC::operator()(TagPairEAMKernelCtemplate ev_tally(ev,i,j,phi,fpair,delx,dely,delz); } From c25ccfdb98795bc496a07dafb33b18cd96c82f87 Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Wed, 14 Oct 2020 13:02:16 -0500 Subject: [PATCH 0042/1471] adding optimization in tersoff Change-Id: I21d2bfe5181b3395430ec4b9949894c61b61587a --- src/KOKKOS/pair_tersoff_kokkos.cpp | 109 +++++++++++++++++++++++++++-- src/KOKKOS/pair_tersoff_kokkos.h | 10 +++ 2 files changed, 115 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 020e2bbfe3..5a144009e2 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -379,8 +379,15 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeHalf cutsq) continue; const F_FLOAT r = sqrt(rsq); + + + #ifdef HIP_OPT_MERGE_FC_K_DFC + F_FLOAT tmp_fce, tmp_fcd; + ters_fc_k_and_ters_dfc(itype,jtype,jtype,r,tmp_fce,tmp_fcd); + #else const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); + #endif const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r; @@ -437,10 +444,16 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeHalf::operator()(TagPairTersoffComputeHalf::ters_dfc(const int &i, const int &j, /* ---------------------------------------------------------------------- */ +#ifdef HIP_OPT_MERGE_FC_K_DFC +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::ters_fc_k_and_ters_dfc(const int &i, const int &j, + const int &k, const F_FLOAT &r, double& result1, double& result2) const +{ + const F_FLOAT ters_R = paramskk(i,j,k).bigr; + const F_FLOAT ters_D = paramskk(i,j,k).bigd; + + if (r < ters_R-ters_D){ + result1 = 1.0; + result2 = 0.0; + return; + } + if (r > ters_R+ters_D){ + result1 = 0.0; + result2 = 0.0; + return; + } + const F_FLOAT arg = MY_PI2*(r - ters_R)/ters_D; + + //double sn, cn; + //sincos(arg, &sn, &cn); + + result1 = 0.5*(1.0 - sin(arg)); + result2 = -(MY_PI4/ters_D) * cos(arg); + return; +} +#endif + +/* ---------------------------------------------------------------------- */ + + + + template KOKKOS_INLINE_FUNCTION double PairTersoffKokkos::bondorder(const int &i, const int &j, const int &k, @@ -970,7 +1024,7 @@ double PairTersoffKokkos::ters_dbij(const int &i, const int &j, const F_FLOAT tmp = paramskk(i,j,k).beta * bo; if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5/sqrt(tmp*tmp);//*pow(tmp,-1.5); if (tmp > paramskk(i,j,k).c2) - return paramskk(i,j,k).beta * (-0.5/sqrt(tmp*tmp) * //*pow(tmp,-1.5) * + return paramskk(i,j,k).beta * (-0.5/sqrt(tmp*tmp) * //*pow(tmp,-1.5) * //LG why ro compute sqrt(tmp^2) ? (1.0 - 0.5*(1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * pow(tmp,-paramskk(i,j,k).powern))); if (tmp < paramskk(i,j,k).c4) return 0.0; @@ -983,6 +1037,53 @@ double PairTersoffKokkos::ters_dbij(const int &i, const int &j, /* ---------------------------------------------------------------------- */ +#ifdef HIP_OPT_MERGE_BIJ_DBIJ +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::ters_bij_k_and_ters_dbij(const int &i, const int &j, + const int &k, const F_FLOAT &bo, double& result1, double& result2) const +{ + const F_FLOAT tmp = paramskk(i,j,k).beta * bo; + if (tmp > paramskk(i,j,k).c1){ + result1 = 1.0/sqrt(tmp); + result2 = paramskk(i,j,k).beta * -0.5/fabs(tmp);//LG replacing 0.5/sqrt(tmp*tmp) by 0.5/fabs(tmp) + return; + } + + auto prm_ijk_pn = paramskk(i,j,k).powern; + + if (tmp > paramskk(i,j,k).c2){ + auto tmp_pow_neg_prm_ijk_pn = pow(tmp,-prm_ijk_pn); + result1 = (1.0 - tmp_pow_neg_prm_ijk_pn / (2.0*prm_ijk_pn))/sqrt(tmp); + result2 = paramskk(i,j,k).beta * (-0.5/fabs(tmp) * + (1.0 - 0.5*(1.0 + 1.0/(2.0*prm_ijk_pn)) * + tmp_pow_neg_prm_ijk_pn)); + return; + } + + if (tmp < paramskk(i,j,k).c4) { + result1 = 1.0; + result2 = 0.0; + return; + } + if (tmp < paramskk(i,j,k).c3){ + auto tmp_pow_prm_ijk_pn_less_one = pow(tmp,prm_ijk_pn-1.0); + result1 = 1.0 - tmp_pow_prm_ijk_pn_less_one*tmp/(2.0*prm_ijk_pn); + result2 = -0.5*paramskk(i,j,k).beta * tmp_pow_prm_ijk_pn_less_one; + return; + } + + const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern); + result1 = pow(1.0 + tmp_n, -1.0/(2.0*prm_ijk_pn)); + result2 = -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*prm_ijk_pn)))*tmp_n / bo; +} +#endif + + + + + + template KOKKOS_INLINE_FUNCTION void PairTersoffKokkos::ters_dthb( diff --git a/src/KOKKOS/pair_tersoff_kokkos.h b/src/KOKKOS/pair_tersoff_kokkos.h index 5f5d537ae7..d35ed8f4f3 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.h +++ b/src/KOKKOS/pair_tersoff_kokkos.h @@ -87,6 +87,11 @@ class PairTersoffKokkos : public PairTersoff { KOKKOS_INLINE_FUNCTION double ters_dfc(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + #ifdef HIP_OPT_MERGE_FC_K_DFC + KOKKOS_INLINE_FUNCTION + void ters_fc_k_and_ters_dfc(const int &i, const int &j, const int &k, const F_FLOAT &r, double &result1, double &result2) const; + #endif + KOKKOS_INLINE_FUNCTION double ters_fa_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; @@ -99,6 +104,11 @@ class PairTersoffKokkos : public PairTersoff { KOKKOS_INLINE_FUNCTION double ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + #ifdef HIP_OPT_MERGE_BIJ_DBIJ + KOKKOS_INLINE_FUNCTION + void ters_bij_k_and_ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo, double &result1, double &result2) const; + #endif + KOKKOS_INLINE_FUNCTION double bondorder(const int &i, const int &j, const int &k, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, From ce96898331b1923d4d1e75739be3953ae129011e Mon Sep 17 00:00:00 2001 From: Sriranjani Sitaraman Date: Wed, 21 Oct 2020 22:03:34 -0400 Subject: [PATCH 0043/1471] Add CUDA variants for hip calls Change-Id: I5d169ee1563c915c0768675d2ec2e7a1774bc969 --- src/KOKKOS/pair_reaxc_kokkos.cpp | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 7797915992..238ed6bd8f 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -102,23 +102,43 @@ PairReaxCKokkos::~PairReaxCKokkos() #ifdef HIP_OPT_TORSION_PREVIEW if (counters != nullptr) { +#ifdef KOKKOS_ENABLE_CUDA + cudaFreeHost(counters); +#else hipHostFree(counters); +#endif counters = nullptr; } if (counters_jj_min != nullptr) { +#ifdef KOKKOS_ENABLE_CUDA + cudaFreeHost(counters_jj_min); +#else hipHostFree(counters_jj_min); +#endif counters_jj_min = nullptr; } if (counters_jj_max != nullptr) { +#ifdef KOKKOS_ENABLE_CUDA + cudaFreeHost(counters_jj_max); +#else hipHostFree(counters_jj_max); +#endif counters_jj_max = nullptr; } if (counters_kk_min != nullptr) { +#ifdef KOKKOS_ENABLE_CUDA + cudaFreeHost(counters_kk_min); +#else hipHostFree(counters_kk_min); +#endif counters_kk_min = nullptr; } if (counters_kk_max != nullptr) { +#ifdef KOKKOS_ENABLE_CUDA + cudaFreeHost(counters_kk_max); +#else hipHostFree(counters_kk_max); +#endif counters_kk_max = nullptr; } #endif @@ -942,22 +962,42 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) #ifdef HIP_OPT_TORSION_PREVIEW if (inum > inum_store) { if (counters != nullptr) { +#ifdef KOKKOS_ENABLE_CUDA + cudaFreeHost(counters); + cudaFreeHost(counters_jj_min); + cudaFreeHost(counters_jj_max); + cudaFreeHost(counters_kk_min); + cudaFreeHost(counters_kk_max); +#else hipHostFree(counters); hipHostFree(counters_jj_min); hipHostFree(counters_jj_max); hipHostFree(counters_kk_min); hipHostFree(counters_kk_max); +#endif } inum_store = inum; // realloc host arrays +#ifdef KOKKOS_ENABLE_CUDA + cudaMallocHost((void**) &counters,sizeof(int)*inum); + cudaMallocHost((void**) &counters_jj_min,sizeof(int)*inum); + cudaMallocHost((void**) &counters_jj_max,sizeof(int)*inum); + cudaMallocHost((void**) &counters_kk_min,sizeof(int)*inum); + cudaMallocHost((void**) &counters_kk_max,sizeof(int)*inum); +#else hipHostMalloc((void**) &counters,sizeof(int)*inum, hipHostMallocNonCoherent); hipHostMalloc((void**) &counters_jj_min,sizeof(int)*inum, hipHostMallocNonCoherent); hipHostMalloc((void**) &counters_jj_max,sizeof(int)*inum, hipHostMallocNonCoherent); hipHostMalloc((void**) &counters_kk_min,sizeof(int)*inum, hipHostMallocNonCoherent); hipHostMalloc((void**) &counters_kk_max,sizeof(int)*inum, hipHostMallocNonCoherent); +#endif } Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); +#ifdef KOKKOS_ENABLE_CUDA + cudaDeviceSynchronize(); +#else hipDeviceSynchronize(); +#endif int nnz = 0; for (int i = 0; i < inum; ++i){ if (counters[i] > 0){ From bb1b269cf80ae1a2151c88d58368345d7c38f89a Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Wed, 21 Oct 2020 20:09:09 -0500 Subject: [PATCH 0044/1471] optimization for Matvec2 and Matvec3, need -DHIP_OPT_SPMV Change-Id: Ia40d2f27c38dd0e500cb7f8f02908be8b1f52a63 --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 57 +++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index d5ca6b2909..31ae09b471 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -736,7 +736,7 @@ void FixQEqReaxKokkos::cg_solve1() int teamsize; if (execution_space == Host) teamsize = 1; - else teamsize = 64;//128 + else teamsize = 128; // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse12Functor sparse12_functor(this); @@ -806,7 +806,12 @@ void FixQEqReaxKokkos::cg_solve1() if (need_dup) Kokkos::Experimental::contribute(d_o, dup_o); } else { +#ifndef HIP_OPT_SPMV Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); +#else + int teamsize = 1024/64; + Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, 64), *this); +#endif } if (neighflag != FULL) { @@ -866,7 +871,7 @@ void FixQEqReaxKokkos::cg_solve2() int teamsize; if (execution_space == Host) teamsize = 1; - else teamsize = 128; + else teamsize = 64; // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse32Functor sparse32_functor(this); @@ -885,7 +890,12 @@ void FixQEqReaxKokkos::cg_solve2() if (need_dup) Kokkos::Experimental::contribute(d_o, dup_o); } else { + #ifdef HIP_OPT_SPMV + int teamsize = 1024/64;//LG need to use some Kokkos function to get WARP_SIZE (max vector size) + Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, 64), *this); + #else Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); + #endif } if (neighflag != FULL) { @@ -938,7 +948,12 @@ void FixQEqReaxKokkos::cg_solve2() if (need_dup) Kokkos::Experimental::contribute(d_o, dup_o); } else { +#ifndef HIP_OPT_SPMV Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); +#else + int teamsize = 1024/64; //LG need to use Kokkos functionality to get max warp size + Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, 64), *this); +#endif } if (neighflag != FULL) { @@ -1113,6 +1128,23 @@ void FixQEqReaxKokkos::sparse23_item(int ii) const /* ---------------------------------------------------------------------- */ +#ifdef HIP_OPT_SPMV +template +KOKKOS_INLINE_FUNCTION +void FixQEqReaxKokkos::operator() (TagSparseMatvec2, const membertype2 &team) const +{ + int k = team.league_rank () * team.team_size () + team.team_rank (); + const int i = d_ilist[k]; + if (mask[i] & groupbit) { + F_FLOAT doitmp; + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &doi) { + const int j = d_jlist(jj); + doi += d_val(jj) * d_d[j]; + }, doitmp); + Kokkos::single(Kokkos::PerThread(team), [&] () {d_o[i] += doitmp; }); + } +} +#else template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::operator() (TagSparseMatvec2, const membertype2 &team) const @@ -1127,6 +1159,7 @@ void FixQEqReaxKokkos::operator() (TagSparseMatvec2, const membertyp Kokkos::single(Kokkos::PerTeam(team), [&] () {d_o[i] += doitmp; }); } } +#endif template KOKKOS_INLINE_FUNCTION @@ -1172,7 +1205,23 @@ void FixQEqReaxKokkos::sparse33_item(int ii) const } /* ---------------------------------------------------------------------- */ - +#ifdef HIP_OPT_SPMV +template +KOKKOS_INLINE_FUNCTION +void FixQEqReaxKokkos::operator() (TagSparseMatvec3, const membertype3 &team) const +{ + int k = team.league_rank () * team.team_size () + team.team_rank (); + const int i = d_ilist[k]; + if (mask[i] & groupbit) { + F_FLOAT doitmp; + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &doi) { + const int j = d_jlist(jj); + doi += d_val(jj) * d_t[j]; + }, doitmp); + Kokkos::single(Kokkos::PerThread(team), [&] () {d_o[i] += doitmp;}); + } +} +#else template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::operator() (TagSparseMatvec3, const membertype3 &team) const @@ -1187,7 +1236,7 @@ void FixQEqReaxKokkos::operator() (TagSparseMatvec3, const membertyp Kokkos::single(Kokkos::PerTeam(team), [&] () {d_o[i] += doitmp;}); } } - +#endif /* ---------------------------------------------------------------------- */ template From 6b568aff7a78183df5ae8bea4dcdcf5d520dd260 Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Thu, 15 Oct 2020 09:36:35 -0500 Subject: [PATCH 0045/1471] adding merged ters_gijk and ters_dgijk, flag AMD_OPT_MERGE_GIJK_DGIJK is required Change-Id: I8d9f433ecb38f8bd3cc72d8f2bc1642f753c7270 --- src/KOKKOS/pair_tersoff_kokkos.cpp | 34 ++++++++++++++++++++++++++++++ src/KOKKOS/pair_tersoff_kokkos.h | 7 ++++++ 2 files changed, 41 insertions(+) diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 5a144009e2..b3169d0586 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -975,6 +975,27 @@ double PairTersoffKokkos:: /* ---------------------------------------------------------------------- */ +#ifdef HIP_OPT_MERGE_GIJK_DGIJK +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos:: + ters_gijk_and_ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos, double& result1, double& result2) const +{ + const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c; + const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d; + const F_FLOAT hcth = paramskk(i,j,k).h - cos; + + const F_FLOAT numerator = -2.0 * ters_c * hcth; + const F_FLOAT denominator = 1.0/(ters_d + hcth*hcth); + + result1 = paramskk(i,j,k).gamma*(1.0 + ters_c/ters_d - ters_c*denominator); + result2 = paramskk(i,j,k).gamma * numerator * denominator * denominator; +} +#endif +/* ---------------------------------------------------------------------- */ + + + template KOKKOS_INLINE_FUNCTION double PairTersoffKokkos::ters_fa_k(const int &i, const int &j, @@ -1112,8 +1133,15 @@ void PairTersoffKokkos::ters_dthb( F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; + #ifdef HIP_OPT_MERGE_FC_K_DFC + ters_fc_k_and_ters_dfc(i,j,k,rik,fc,dfc); + #else fc = ters_fc_k(i,j,k,rik); dfc = ters_dfc(i,j,k,rik); + #endif + + + const F_FLOAT param = paramskk(i,j,k).lam3 * (rij-rik); if (int(paramskk(i,j,k).powerm) == 3) tmp = param*param*param;//pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); else tmp = param; @@ -1127,8 +1155,14 @@ void PairTersoffKokkos::ters_dthb( else dex_delr = paramskk(i,j,k).lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); + + //LG consider merging + #ifdef HIP_OPT_MERGE_GIJK_DGIJK + ters_gijk_and_ters_dgijk(i,j,k,cos,gijk,dgijk); + #else gijk = ters_gijk(i,j,k,cos); dgijk = ters_dgijk(i,j,k,cos); + #endif // from PairTersoff::costheta_d vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); diff --git a/src/KOKKOS/pair_tersoff_kokkos.h b/src/KOKKOS/pair_tersoff_kokkos.h index d35ed8f4f3..f6ec88f50d 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.h +++ b/src/KOKKOS/pair_tersoff_kokkos.h @@ -120,6 +120,13 @@ class PairTersoffKokkos : public PairTersoff { KOKKOS_INLINE_FUNCTION double ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + #ifdef HIP_OPT_MERGE_GIJK_DGIJK + KOKKOS_INLINE_FUNCTION + void ters_gijk_and_ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos, double& gijk, double& dgijk) const; + #endif + + + KOKKOS_INLINE_FUNCTION void ters_dthb(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, From 56a1cd12e54fdc6878a81b4df926e474e9150e2d Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Mon, 26 Oct 2020 16:59:13 -0500 Subject: [PATCH 0046/1471] fix for multi-GPU failure for reax Change-Id: Ia261ff2f1158de2bd6ac2d83a15080f6b6253128 --- src/KOKKOS/comm_kokkos.cpp | 81 ++++++++++------------------ src/KOKKOS/comm_kokkos.h | 8 --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 84 ++++-------------------------- src/KOKKOS/fix_qeq_reax_kokkos.h | 13 +---- 4 files changed, 39 insertions(+), 147 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 133b93f50a..c66cb31fc9 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -368,7 +368,7 @@ void CommKokkos::forward_comm_fix(Fix *fix, int size) CommBrick::forward_comm_fix(fix,size); } else { k_sendlist.sync(); - forward_comm_fix_device(fix); + forward_comm_fix_device(fix, size); } } @@ -476,44 +476,51 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) MPI_Request request; - int iswap, n; + int nsize = pair->comm_forward; + KokkosBase* pairKKBase = dynamic_cast(pair); + + for (iswap = 0; iswap < nswap; iswap++) { + int n = MAX(max_buf_pair,nsize*sendnum[iswap]); + n = MAX(n,nsize*recvnum[iswap]); + if (n > max_buf_pair) + grow_buf_pair(n); + } + for (iswap = 0; iswap < nswap; iswap++) { // pack buffer - n = KKBase->pack_forward_comm_kokkos(sendnum[iswap], k_sendlist, iswap, - k_buf_send_fop, pbc_flag[iswap], - pbc[iswap]); + n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist, + iswap,k_buf_send_pair,pbc_flag[iswap],pbc[iswap]); DeviceType().fence(); // exchange with another proc // if self, set recv buffer to send buffer if (sendproc[iswap] != me) { - double *buf_send; - double *buf_recv; + double* buf_send_pair; + double* buf_recv_pair; if (lmp->kokkos->gpu_aware_flag) { - buf_send = k_buf_send_fop.view().data(); - buf_recv = k_buf_recv_fop.view().data(); + buf_send_pair = k_buf_send_pair.view().data(); + buf_recv_pair = k_buf_recv_pair.view().data(); } else { - k_buf_send_fop.modify(); - k_buf_send_fop.sync(); - buf_send = k_buf_send_fop.h_view.data(); - buf_recv = k_buf_recv_fop.h_view.data(); + k_buf_send_pair.modify(); + k_buf_send_pair.sync(); + buf_send_pair = k_buf_send_pair.h_view.data(); + buf_recv_pair = k_buf_recv_pair.h_view.data(); } if (recvnum[iswap]) { - MPI_Irecv(buf_recv, nsize * recvnum[iswap], MPI_DOUBLE, recvproc[iswap], - 0, world, &request); + MPI_Irecv(buf_recv_pair,nsize*recvnum[iswap],MPI_DOUBLE, + recvproc[iswap],0,world,&request); } if (sendnum[iswap]) - MPI_Send(buf_send, n, MPI_DOUBLE, sendproc[iswap], 0, world); - if (recvnum[iswap]) - MPI_Wait(&request, MPI_STATUS_IGNORE); + MPI_Send(buf_send_pair,n,MPI_DOUBLE,sendproc[iswap],0,world); + if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); if (!lmp->kokkos->gpu_aware_flag) { - k_buf_recv_fop.modify(); - k_buf_recv_fop.sync(); + k_buf_recv_pair.modify(); + k_buf_recv_pair.sync(); } k_buf_tmp = k_buf_recv_pair; } else k_buf_tmp = k_buf_send_pair; @@ -521,43 +528,11 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) // unpack buffer pairKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_tmp); + DeviceType().fence(); } } -template -void CommKokkos::forward_comm_pair_device(Pair *pair) { - int nsize = pair->comm_forward; - KokkosBase *pairKKBase = dynamic_cast(pair); - - int iswap; - for (iswap = 0; iswap < nswap; iswap++) { - int n = MAX(max_buf_pair, nsize * sendnum[iswap]); - n = MAX(n, nsize * recvnum[iswap]); - if (n > max_buf_pair) - grow_buf_pair(n); - } - - forward_comm_device_fix_or_pair(pairKKBase, nsize, - k_buf_send_pair, k_buf_recv_pair); -} - -template void CommKokkos::forward_comm_fix_device(Fix *fix) { - - int nsize = fix->comm_forward; - KokkosBase *fixKKBase = dynamic_cast(fix); - - int iswap; - for (iswap = 0; iswap < nswap; iswap++) { - int n = MAX(max_buf_fix, nsize * sendnum[iswap]); - n = MAX(n, nsize * recvnum[iswap]); - if (n > max_buf_fix) - grow_buf_fix(n); - } - - forward_comm_device_fix_or_pair(fixKKBase, nsize, k_buf_send_fix, - k_buf_recv_fix); -} void CommKokkos::grow_buf_pair(int n) { max_buf_pair = n * BUFFACTOR; diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index 72d8bcaed7..d4281246bf 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -83,10 +83,6 @@ class CommKokkos : public CommBrick { void grow_buf_pair(int); void grow_buf_fix(int); - int max_buf_fix; - DAT::tdual_xfloat_1d k_buf_send_fix; - DAT::tdual_xfloat_1d k_buf_recv_fix; - void grow_buf_fix(int); void grow_send(int, int); void grow_recv(int); @@ -95,10 +91,6 @@ class CommKokkos : public CommBrick { void grow_list(int, int); void grow_swap(int); void copy_swap_info(); - template - void forward_comm_device_fix_or_pair(KokkosBase *, int, - DAT::tdual_xfloat_1d &, - DAT::tdual_xfloat_1d &); }; } diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 31ae09b471..e0173e9bc5 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -260,7 +260,7 @@ void FixQEqReaxKokkos::pre_force(int /*vflag*/) FixQEqReaxKokkosMatVecFunctor matvec_functor(this); Kokkos::parallel_for(inum,matvec_functor); - // comm->forward_comm_fix(this); //Dist_vector( s ); +// comm->forward_comm_fix(this); //Dist_vector( s ); pack_flag = 2; k_s.template modify(); comm->forward_comm_fix(this); @@ -372,9 +372,6 @@ void FixQEqReaxKokkos::allocate_array() k_d = DAT::tdual_ffloat_1d("qeq/kk:d",nmax); d_d = k_d.template view(); h_d = k_d.h_view; - - d_q = atomKK->k_q.template view(); - h_d = atomKK->k_q.h_view; } // init_storage @@ -783,12 +780,14 @@ void FixQEqReaxKokkos::cg_solve1() int loop; for (loop = 1; (loop < imax) && (sqrt(sig_new)/b_norm > tolerance); loop++) { - // comm->forward_comm_fix(this); //Dist_vector( d ); + +// comm->forward_comm_fix(this); //Dist_vector( d ); pack_flag = 1; k_d.template modify(); comm->forward_comm_fix(this); k_d.template sync(); + // sparse_matvec( &H, d, q ); FixQEqReaxKokkosSparse22Functor sparse22_functor(this); Kokkos::parallel_for(inum,sparse22_functor); @@ -891,8 +890,9 @@ void FixQEqReaxKokkos::cg_solve2() Kokkos::Experimental::contribute(d_o, dup_o); } else { #ifdef HIP_OPT_SPMV - int teamsize = 1024/64;//LG need to use some Kokkos function to get WARP_SIZE (max vector size) - Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, 64), *this); + int vector_length = 64; + int teamsize = 1024/vector_length;//LG need to use some Kokkos function to get WARP_SIZE (max vector size) + Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, vector_length), *this); #else Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); #endif @@ -925,12 +925,13 @@ void FixQEqReaxKokkos::cg_solve2() int loop; for (loop = 1; (loop < imax) && (sqrt(sig_new)/b_norm > tolerance); loop++) { - // comm->forward_comm_fix(this); //Dist_vector( d ); + // comm->forward_comm_fix(this); //Dist_vector( d ); pack_flag = 1; k_d.template modify(); comm->forward_comm_fix(this); k_d.template sync(); + // sparse_matvec( &H, d, q ); FixQEqReaxKokkosSparse22Functor sparse22_functor(this); Kokkos::parallel_for(inum,sparse22_functor); @@ -1478,42 +1479,6 @@ int FixQEqReaxKokkos::pack_forward_comm(int n, int *list, double *bu /* ---------------------------------------------------------------------- */ -template -int FixQEqReaxKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, - int iswap_in, DAT::tdual_xfloat_1d &buf, - int /*pbc_flag*/, int * /*pbc*/) -{ - d_sendlist = k_sendlist.view(); - iswap = iswap_in; - v_buf = buf.view(); - if (pack_flag == 1) - Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); - else if (pack_flag == 2) - Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); - else if (pack_flag == 3) - Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); - else if (pack_flag == 4) - Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); - return n; -} - -/* ---------------------------------------------------------------------- */ - -template -template -KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::operator()(TagFixQEQPackFwdComm, const int &i) const { - int j = d_sendlist(iswap, i); - if (PACKFLAG == 1) - v_buf[i] = d_d[j]; - else if( PACKFLAG == 2 ) - v_buf[i] = d_s[j]; - else if( PACKFLAG == 3 ) - v_buf[i] = d_t[j]; - else if( PACKFLAG == 4 ) - v_buf[i] = d_q[j]; -} - /* ---------------------------------------------------------------------- */ @@ -1543,37 +1508,6 @@ void FixQEqReaxKokkos::unpack_forward_comm(int n, int first, double /* ---------------------------------------------------------------------- */ -template -void FixQEqReaxKokkos::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf) -{ - first = first_in; - v_buf = buf.view(); - - if (pack_flag == 1) - Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); - else if (pack_flag == 2) - Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); - else if (pack_flag == 3) - Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); - else if (pack_flag == 4) - Kokkos::parallel_for(Kokkos::RangePolicy>(0,n),*this); -} - -/* ---------------------------------------------------------------------- */ - -template -template -KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::operator()(TagFixQEQUnpackFwdComm, const int &i) const { - if (PACKFLAG == 1) - d_d[i + first] = v_buf[i]; - else if( PACKFLAG == 2 ) - d_s[i + first] = v_buf[i]; - else if( PACKFLAG == 3 ) - d_t[i + first] = v_buf[i]; - else if( PACKFLAG == 4 ) - d_q[i + first] = v_buf[i]; -} /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.h b/src/KOKKOS/fix_qeq_reax_kokkos.h index 3e4220bfd1..ec68990ecd 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.h +++ b/src/KOKKOS/fix_qeq_reax_kokkos.h @@ -39,7 +39,8 @@ struct TagFixQEqReaxPackForwardComm {}; struct TagFixQEqReaxUnpackForwardComm {}; template -class FixQEqReaxKokkos : public FixQEqReax, public KokkosBase { + +class FixQEqReaxKokkos : public FixQEqReax { public: typedef DeviceType device_type; typedef ArrayTypes AT; @@ -104,14 +105,6 @@ class FixQEqReaxKokkos : public FixQEqReax, public KokkosBase { KOKKOS_INLINE_FUNCTION void operator()(TagZeroQGhosts, const int&) const; - template - KOKKOS_INLINE_FUNCTION - void operator()(TagFixQEQPackFwdComm, const int&) const; - - template - KOKKOS_INLINE_FUNCTION - void operator()(TagFixQEQUnpackFwdComm, const int&) const; - KOKKOS_INLINE_FUNCTION void vecsum2_item(int) const; @@ -192,8 +185,6 @@ class FixQEqReaxKokkos : public FixQEqReax, public KokkosBase { typename ArrayTypes::t_tagint_1d tag; DAT::tdual_float_1d k_q; - typename AT::t_float_1d d_q; - HAT::t_float_1d h_q; typename ArrayTypes::t_neighbors_2d d_neighbors; typename ArrayTypes::t_int_1d_randomread d_ilist, d_numneigh; From 1856b669beec605adceb58af5f278b6d3772915f Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Tue, 27 Oct 2020 10:26:15 -0500 Subject: [PATCH 0047/1471] Apply patch from Stan to fix race in Reax Change-Id: I372b5951cf0f5e3343c3c6283f01cf0b8c889a56 --- src/KOKKOS/comm_kokkos.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index c66cb31fc9..c79d5f5057 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -528,7 +528,6 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) // unpack buffer pairKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_tmp); - DeviceType().fence(); } } From e3a2eb0713e986fe6652f01ac055804f46c43f81 Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Sat, 24 Oct 2020 15:01:19 -0500 Subject: [PATCH 0048/1471] merging FC_K and DFC calculation, activate with flag HIP_OPT_MERGE_FC_K_DFC Change-Id: I68440f02c79f8cc2cc4788dac2447cfa58f0b54a --- src/KOKKOS/pair_tersoff_kokkos.cpp | 38 ++++++++++++++++++++++++------ src/KOKKOS/pair_tersoff_kokkos.h | 7 ++++-- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index b3169d0586..adfd46c6e8 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -329,8 +329,8 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeHalf::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); - auto a_f = v_f.template access::value>(); + const auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + const auto a_f = v_f.template access::value>(); const int i = d_ilist[ii]; if (i >= nlocal) return; @@ -442,8 +442,14 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeHalf:: /* ---------------------------------------------------------------------- */ - template KOKKOS_INLINE_FUNCTION double PairTersoffKokkos::ters_fa_k(const int &i, const int &j, @@ -1020,6 +1025,28 @@ double PairTersoffKokkos::ters_dfa(const int &i, const int &j, /* ---------------------------------------------------------------------- */ + +#ifdef HIP_OPT_MERGE_FA_K_FA +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::ters_fa_k_and_ters_dfa(const int &i, const int &j, + const int &k, const F_FLOAT &r, double& result1, double& result2) const +{ + if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd){ + result1 = 0.0; + result2 = 0.0; + } + else{ + double tmp1 = paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r); + F_FLOAT fc_k, dfc; + ters_fc_k_and_ters_dfc(i,j,k,r,fc_k,dfc); + result1 = -tmp1 * fc_k; + result2 = tmp1 * (paramskk(i,j,k).lam2 * fc_k - dfc); + } +} +#endif + + template KOKKOS_INLINE_FUNCTION double PairTersoffKokkos::ters_bij_k(const int &i, const int &j, @@ -1140,8 +1167,6 @@ void PairTersoffKokkos::ters_dthb( dfc = ters_dfc(i,j,k,rik); #endif - - const F_FLOAT param = paramskk(i,j,k).lam3 * (rij-rik); if (int(paramskk(i,j,k).powerm) == 3) tmp = param*param*param;//pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); else tmp = param; @@ -1155,8 +1180,7 @@ void PairTersoffKokkos::ters_dthb( else dex_delr = paramskk(i,j,k).lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); - - //LG consider merging + #ifdef HIP_OPT_MERGE_GIJK_DGIJK ters_gijk_and_ters_dgijk(i,j,k,cos,gijk,dgijk); #else diff --git a/src/KOKKOS/pair_tersoff_kokkos.h b/src/KOKKOS/pair_tersoff_kokkos.h index f6ec88f50d..6fda9218ad 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.h +++ b/src/KOKKOS/pair_tersoff_kokkos.h @@ -98,6 +98,11 @@ class PairTersoffKokkos : public PairTersoff { KOKKOS_INLINE_FUNCTION double ters_dfa(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + #ifdef HIP_OPT_MERGE_FA_K_FA + KOKKOS_INLINE_FUNCTION + void ters_fa_k_and_ters_dfa(const int &i, const int &j, const int &k, const F_FLOAT &r, double &result1, double &result2) const; + #endif + KOKKOS_INLINE_FUNCTION double ters_bij_k(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; @@ -125,8 +130,6 @@ class PairTersoffKokkos : public PairTersoff { void ters_gijk_and_ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos, double& gijk, double& dgijk) const; #endif - - KOKKOS_INLINE_FUNCTION void ters_dthb(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, From 20a4d295e54d69703d77cdef03ff97676e63b766 Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Sun, 25 Oct 2020 10:40:26 -0500 Subject: [PATCH 0049/1471] Optimized Torsion with blocking, flag needed: HIP_OPT_TORSION_PREVIEW_BLOCKING and HIP_OPT_TORSION_PREVIEW Change-Id: I667ded58fc2c8b8253ea655e9fdf9b5cc3f737e4 --- src/KOKKOS/pair_reaxc_kokkos.cpp | 536 +++++++++++++++++++++++++++++-- src/KOKKOS/pair_reaxc_kokkos.h | 11 + 2 files changed, 526 insertions(+), 21 deletions(-) diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 238ed6bd8f..2a39c1a287 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -992,12 +992,9 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) hipHostMalloc((void**) &counters_kk_max,sizeof(int)*inum, hipHostMallocNonCoherent); #endif } - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); -#ifdef KOKKOS_ENABLE_CUDA - cudaDeviceSynchronize(); -#else - hipDeviceSynchronize(); -#endif + + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::fence(); int nnz = 0; for (int i = 0; i < inum; ++i){ if (counters[i] > 0){ @@ -1005,6 +1002,22 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) nnz++; } } + #ifdef HIP_OPT_TORSION_PREVIEW_BLOCKING + if (neighflag == HALF) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,nnz),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,nnz),*this); + ev_all += ev; + } else { + if (evflag) { + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,nnz),*this,ev); + } else{ + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,nnz),*this); + } + ev_all += ev; + } + #else if (neighflag == HALF) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this,ev); @@ -1019,7 +1032,7 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) } ev_all += ev; } - + #endif #else // !HIP_OPT_TORSION_PREVIEW @@ -3049,7 +3062,6 @@ KOKKOS_INLINE_FUNCTION void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, const int &ii) const { F_FLOAT bo_ij, bo_ik, bo_jl; - //F_FLOAT fn10, f11_DiDj, dfn11, fn12; int counter = 0; const int i = d_ilist[ii]; @@ -3068,6 +3080,9 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, con int kk_max = j_start-1; for (int jj = j_start; jj < j_end; jj++) { + + // j_counter1++; + int j = d_bo_list[jj]; j &= NEIGHMASK; const tagint jtag = tag(j); @@ -3087,6 +3102,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, con bo_ij = d_BO(i,j_index); if (bo_ij < thb_cut) continue; + const int l_start = d_bo_first[j]; const int l_end = l_start + d_bo_num[j]; @@ -3097,6 +3113,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, con const int k_index = kk - j_start; bo_ik = d_BO(i,k_index); if (bo_ik < thb_cut) continue; + //k_counter2++; #if 1 counter++; jj_min = jj < jj_min ? jj : jj_min; @@ -3121,22 +3138,10 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, con } #endif } + } counters[ii] = counter; if (counter > 0){ - //LG should not be needed ... test and consider removing later - // if (jj_min > j_end) jj_min=j_end; - // if (jj_max > j_end) jj_max=j_end; - - // if (kk_min > j_end) kk_min=j_end; - // if (kk_max > j_end) kk_max=j_end; - - // if (jj_min < j_start) jj_min=j_start; - // if (jj_max < j_start) jj_max=j_start; - - // if (kk_min < j_start) kk_min=j_start; - // if (kk_max < j_start) kk_max=j_start; - counters_jj_min[ii] = jj_min; counters_jj_max[ii] = jj_max; counters_kk_min[ii] = kk_min; @@ -3148,6 +3153,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion_preview, con /* ---------------------------------------------------------------------- */ + template template KOKKOS_INLINE_FUNCTION @@ -3552,6 +3558,486 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion +template +KOKKOS_INLINE_FUNCTION +void PairReaxCKokkos::operator()(PairReaxComputeTorsion_with_BLOCKING, const int &iii, EV_FLOAT_REAX& ev) const { + + #ifdef HIP_OPT_TORSION_PREVIEW + const int ii = counters[iii]; + #else + const int ii = iii; + #endif + + const auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + const auto a_f = v_f.template access::value>(); + + const auto v_CdDelta = ScatterViewHelper::value,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); + const auto a_CdDelta = v_CdDelta.template access::value>(); + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_Cdbo = d_Cdbo; + //auto a_Cdbo = dup_Cdbo.template access::value>(); + + // in reaxc_torsion_angles: j = i, k = j, i = k; + + F_FLOAT Delta_i, Delta_j, bo_ij, bo_ik, bo_jl, BOA_ij, BOA_ik, BOA_jl; + F_FLOAT p_tor1, p_cot1, V1, V2, V3; + F_FLOAT exp_tor2_ij, exp_tor2_ik, exp_tor2_jl, exp_tor1, exp_tor3_DiDj, exp_tor4_DiDj, exp_tor34_inv; + F_FLOAT exp_cot2_ij, exp_cot2_ik, exp_cot2_jl, fn10, f11_DiDj, dfn11, fn12; + F_FLOAT theta_ijk, theta_jil, sin_ijk, sin_jil, cos_ijk, cos_jil, tan_ijk_i, tan_jil_i; + F_FLOAT cos_omega, cos2omega, cos3omega; + F_FLOAT CV, cmn, CEtors1, CEtors2, CEtors3, CEtors4; + F_FLOAT CEtors5, CEtors6, CEtors7, CEtors8, CEtors9; + F_FLOAT Cconj, CEconj1, CEconj2, CEconj3, CEconj4, CEconj5, CEconj6; + F_FLOAT e_tor, e_con, eng_tmp; + + F_FLOAT delij[3], delik[3], deljl[3], dellk[3], delil[3], delkl[3]; + F_FLOAT fi_tmp[3], fj_tmp[3], fk_tmp[3], fl_tmp[3]; + F_FLOAT dcos_omega_di[3], dcos_omega_dj[3], dcos_omega_dk[3], dcos_omega_dl[3]; + F_FLOAT dcos_ijk_di[3], dcos_ijk_dj[3], dcos_ijk_dk[3], dcos_jil_di[3], dcos_jil_dj[3], dcos_jil_dk[3]; + + F_FLOAT p_tor2 = gp[23]; + F_FLOAT p_tor3 = gp[24]; + F_FLOAT p_tor4 = gp[25]; + F_FLOAT p_cot2 = gp[27]; + + + const int i = d_ilist[ii]; + #ifdef HIP_OPT_TORSION_PREVIEW + const int jj_start = counters_jj_min[ii]; + const int jj_stop = counters_jj_max[ii]; + const int kk_start = counters_kk_min[ii]; + const int kk_stop = counters_kk_max[ii]; + #endif + + const int itype = type(i); + const tagint itag = tag(i); + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + Delta_i = d_Delta_boc[i]; + + const int j_start = d_bo_first[i]; + const int j_end = j_start + d_bo_num[i]; + + F_FLOAT fitmp[3], fjtmp[3], fktmp[3]; + for(int j = 0; j < 3; j++) fitmp[j] = 0.0; + F_FLOAT CdDelta_i = 0.0; + + + unsigned char BLK_SZ=1; + unsigned char nnz_jj; + unsigned char selected_jj[1]; + #ifdef HIP_OPT_TORSION_PREVIEW + unsigned int jj_current = jj_start; + #else + unsigned int jj_current = j_start; + #endif + + + #ifdef HIP_OPT_TORSION_PREVIEW + while (jj_current < jj_stop) { + #else + while (jj_current < j_end) { + #endif + + nnz_jj=0; + while (nnz_jj < BLK_SZ) { + int jj = jj_current; + int j = d_bo_list[jj]; + j &= NEIGHMASK; + const tagint jtag = tag(j); + const int j_index = jj - j_start; + bool FLAG_CONTINUE = false; + + // skip half of the interactions + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) FLAG_CONTINUE = true; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) FLAG_CONTINUE = true; + } else { + if (x(j,2) < ztmp) FLAG_CONTINUE = true; + else if (x(j,2) == ztmp && x(j,1) < ytmp) FLAG_CONTINUE = true; + else if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) FLAG_CONTINUE = true; + } + + bo_ij = d_BO(i,j_index); + if (bo_ij < thb_cut) FLAG_CONTINUE = true; + + if (FLAG_CONTINUE == false){ + #ifdef HIP_OPT_TORSION_PREVIEW + selected_jj[nnz_jj] = jj_current-jj_start; + #else + selected_jj[nnz_jj] = jj_current-j_start; + #endif + nnz_jj++; + } + jj_current++; + #ifdef HIP_OPT_TORSION_PREVIEW + if (jj_current == jj_stop) break; + #else + if (jj_current == j_end) break; + #endif + } + + for (int jj_inner = 0; jj_inner < nnz_jj; jj_inner++){ + #ifdef HIP_OPT_TORSION_PREVIEW + const int jj = jj_start + selected_jj[jj_inner]; + #else + const int jj = j_start + selected_jj[jj_inner]; + #endif + int j = d_bo_list[jj]; + j &= NEIGHMASK; + const tagint jtag = tag(j); + const int jtype = type(j); + const int j_index = jj - j_start; + bo_ij = d_BO(i,j_index); + + + delij[0] = x(j,0) - xtmp; + delij[1] = x(j,1) - ytmp; + delij[2] = x(j,2) - ztmp; + const F_FLOAT rsqij = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2]; + const F_FLOAT rij = sqrt(rsqij); + + BOA_ij = bo_ij - thb_cut; + Delta_j = d_Delta_boc[j]; + exp_tor2_ij = exp( -p_tor2 * BOA_ij ); + exp_cot2_ij = exp( -p_cot2 * SQR(BOA_ij - 1.5) ); + exp_tor3_DiDj = exp( -p_tor3 * (Delta_i + Delta_j) ); + exp_tor4_DiDj = exp( p_tor4 * (Delta_i + Delta_j) ); + exp_tor34_inv = 1.0 / (1.0 + exp_tor3_DiDj + exp_tor4_DiDj); + f11_DiDj = (2.0 + exp_tor3_DiDj) * exp_tor34_inv; + + const int l_start = d_bo_first[j]; + const int l_end = l_start + d_bo_num[j]; + + for(int k = 0; k < 3; k++) fjtmp[k] = 0.0; + F_FLOAT CdDelta_j = 0.0; + + unsigned char nnz_kk; + + unsigned char selected_kk[1]; + #ifdef HIP_OPT_TORSION_PREVIEW + unsigned int kk_current = kk_start; + #else + unsigned int kk_current = j_start; + #endif + + #ifdef HIP_OPT_TORSION_PREVIEW + while (kk_current < kk_stop) { + #else + while (kk_current < j_end) { + #endif + nnz_kk=0; + while (nnz_kk < BLK_SZ) { + int kk = kk_current; + int k = d_bo_list[kk]; + k &= NEIGHMASK; + bool FLAG_CONTINUE = false; + + if (k == j) + FLAG_CONTINUE = true; + else{ + const int k_index = kk - j_start; + bo_ik = d_BO(i,k_index); + if (bo_ik < thb_cut) FLAG_CONTINUE = true; + } + + if (FLAG_CONTINUE == false){ + #ifdef HIP_OPT_TORSION_PREVIEW + selected_kk[nnz_kk] = kk_current-kk_start; + #else + selected_kk[nnz_kk] = kk_current-j_start; + #endif + nnz_kk++; + } + kk_current++; + #ifdef HIP_OPT_TORSION_PREVIEW + if (kk_current == kk_stop) break; + #else + if (kk_current == j_end) break; + #endif + } + + for (int kk_inner = 0; kk_inner < nnz_kk; kk_inner++){ + #ifdef HIP_OPT_TORSION_PREVIEW + const int kk = kk_start + selected_kk[kk_inner]; + #else + const int kk = j_start + selected_kk[kk_inner]; + #endif + int k = d_bo_list[kk]; + k &= NEIGHMASK; + const int ktype = type(k); + const int k_index = kk - j_start; + bo_ik = d_BO(i,k_index); + + + BOA_ik = bo_ik - thb_cut; + for (int d = 0; d < 3; d ++) delik[d] = x(k,d) - x(i,d); + const F_FLOAT rsqik = delik[0]*delik[0] + delik[1]*delik[1] + delik[2]*delik[2]; + const F_FLOAT rik = sqrt(rsqik); + + cos_ijk = (delij[0]*delik[0]+delij[1]*delik[1]+delij[2]*delik[2])/(rij*rik); + if (cos_ijk > 1.0) cos_ijk = 1.0; + else if (cos_ijk < -1.0) cos_ijk = -1.0; //LG changed "if" to "else if" + theta_ijk = acos(cos_ijk); + + // dcos_ijk + const F_FLOAT inv_dists = 1.0 / (rij * rik); + const F_FLOAT cos_ijk_tmp = cos_ijk *inv_dists * inv_dists; + + for( int d = 0; d < 3; d++ ) { + dcos_ijk_di[d] = -(delik[d] + delij[d]) * inv_dists + cos_ijk_tmp * (rsqik * delij[d] + rsqij * delik[d]); + dcos_ijk_dj[d] = delik[d] * inv_dists - cos_ijk_tmp * rsqik * delij[d]; + dcos_ijk_dk[d] = delij[d] * inv_dists - cos_ijk_tmp * rsqij * delik[d]; + } + + sin_ijk = sin( theta_ijk ); + if (sin_ijk >= 0 && sin_ijk <= 1e-10) + tan_ijk_i = cos_ijk / 1e-10; + else if( sin_ijk <= 0 && sin_ijk >= -1e-10 ) + tan_ijk_i = -cos_ijk / 1e-10; + else tan_ijk_i = cos_ijk / sin_ijk; + + exp_tor2_ik = exp( -p_tor2 * BOA_ik ); + exp_cot2_ik = exp( -p_cot2 * SQR(BOA_ik -1.5) ); + + for(int l = 0; l < 3; l++) fktmp[l] = 0.0; + + for (int ll = l_start; ll < l_end; ll++) { + int l = d_bo_list[ll]; + l &= NEIGHMASK; + if (l == i) continue; + const int ltype = type(l); + const int l_index = ll - l_start; + + bo_jl = d_BO(j,l_index); + if (l == k || bo_jl < thb_cut || bo_ij*bo_ik*bo_jl < thb_cut) continue; + + for (int d = 0; d < 3; d ++) deljl[d] = x(l,d) - x(j,d); + const F_FLOAT rsqjl = deljl[0]*deljl[0] + deljl[1]*deljl[1] + deljl[2]*deljl[2]; + const F_FLOAT rjl = sqrt(rsqjl); + BOA_jl = bo_jl - thb_cut; + + cos_jil = -(delij[0]*deljl[0]+delij[1]*deljl[1]+delij[2]*deljl[2])/(rij*rjl); + if (cos_jil > 1.0) cos_jil = 1.0; + else if (cos_jil < -1.0) cos_jil = -1.0; //LG changed "if" to "else if" + theta_jil = acos(cos_jil); + + // dcos_jil + const F_FLOAT inv_distjl = 1.0 / (rij * rjl); + const F_FLOAT cos_jil_tmp = cos_jil / ((rij*rjl)*(rij*rjl)); + + for( int d = 0; d < 3; d++ ) { + dcos_jil_di[d] = deljl[d] * inv_distjl - cos_jil_tmp * rsqjl * -delij[d]; + dcos_jil_dj[d] = (-deljl[d] + delij[d]) * inv_distjl - cos_jil_tmp * (rsqjl * delij[d] + rsqij * -deljl[d]); + dcos_jil_dk[d] = -delij[d] * inv_distjl - cos_jil_tmp * rsqij * deljl[d]; + } + + sin_jil = sin( theta_jil ); + if (sin_jil >= 0 && sin_jil <= 1e-10) + tan_jil_i = cos_jil / 1e-10; + else if( sin_jil <= 0 && sin_jil >= -1e-10 ) + tan_jil_i = -cos_jil / 1e-10; + else tan_jil_i = cos_jil / sin_jil; + + for (int d = 0; d < 3; d ++) dellk[d] = x(k,d) - x(l,d); + const F_FLOAT rsqlk = dellk[0]*dellk[0] + dellk[1]*dellk[1] + dellk[2]*dellk[2]; + const F_FLOAT rlk = sqrt(rsqlk); + + F_FLOAT unnorm_cos_omega, unnorm_sin_omega, omega; + F_FLOAT htra, htrb, htrc, hthd, hthe, hnra, hnrc, hnhd, hnhe; + F_FLOAT arg, poem, tel; + F_FLOAT cross_ij_jl[3]; + + // omega + + F_FLOAT dot_ij_jk = -(delij[0]*delik[0]+delij[1]*delik[1]+delij[2]*delik[2]); + F_FLOAT dot_ij_lj = delij[0]*deljl[0]+delij[1]*deljl[1]+delij[2]*deljl[2]; + F_FLOAT dot_ik_jl = delik[0]*deljl[0]+delik[1]*deljl[1]+delik[2]*deljl[2]; + unnorm_cos_omega = dot_ij_jk * dot_ij_lj + rsqij * dot_ik_jl; + + cross_ij_jl[0] = delij[1]*deljl[2] - delij[2]*deljl[1]; + cross_ij_jl[1] = delij[2]*deljl[0] - delij[0]*deljl[2]; + cross_ij_jl[2] = delij[0]*deljl[1] - delij[1]*deljl[0]; + + unnorm_sin_omega = -rij*(delik[0]*cross_ij_jl[0]+delik[1]*cross_ij_jl[1]+delik[2]*cross_ij_jl[2]); + omega = atan2( unnorm_sin_omega, unnorm_cos_omega ); + + htra = rik + cos_ijk * ( rjl * cos_jil - rij ); + htrb = rij - rik * cos_ijk - rjl * cos_jil; + htrc = rjl + cos_jil * ( rik * cos_ijk - rij ); + hthd = rik * sin_ijk * ( rij - rjl * cos_jil ); + hthe = rjl * sin_jil * ( rij - rik * cos_ijk ); + hnra = rjl * sin_ijk * sin_jil; + hnrc = rik * sin_ijk * sin_jil; + hnhd = rik * rjl * cos_ijk * sin_jil; + hnhe = rik * rjl * sin_ijk * cos_jil; + + poem = 2.0 * rik * rjl * sin_ijk * sin_jil; + if (poem < 1e-20) poem = 1e-20; + + tel = SQR(rik) + SQR(rij) + SQR(rjl) - SQR(rlk) - + 2.0 * (rik * rij * cos_ijk - rik * rjl * cos_ijk * cos_jil + rij * rjl * cos_jil); + + arg = tel / poem; + if (arg > 1.0) arg = 1.0; + else if (arg < -1.0) arg = -1.0; //LG changed from "if" to "else if" + + F_FLOAT sin_ijk_rnd = sin_ijk; + F_FLOAT sin_jil_rnd = sin_jil; + + if (sin_ijk >= 0 && sin_ijk <= 1e-10) sin_ijk_rnd = 1e-10; + else if( sin_ijk <= 0 && sin_ijk >= -1e-10 ) sin_ijk_rnd = -1e-10; + if (sin_jil >= 0 && sin_jil <= 1e-10) sin_jil_rnd = 1e-10; + else if( sin_jil <= 0 && sin_jil >= -1e-10 ) sin_jil_rnd = -1e-10; + + // dcos_omega_di + for (int d = 0; d < 3; d++) dcos_omega_dk[d] = ((htra-arg*hnra)/rik) * delik[d] - dellk[d]; + for (int d = 0; d < 3; d++) dcos_omega_dk[d] += (hthd-arg*hnhd)/sin_ijk_rnd * -dcos_ijk_dk[d]; + for (int d = 0; d < 3; d++) dcos_omega_dk[d] *= 2.0/poem; + + // dcos_omega_dj + for (int d = 0; d < 3; d++) dcos_omega_di[d] = -((htra-arg*hnra)/rik) * delik[d] - htrb/rij * delij[d]; + for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthd-arg*hnhd)/sin_ijk_rnd * dcos_ijk_di[d]; + for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthe-arg*hnhe)/sin_jil_rnd * dcos_jil_di[d]; + for (int d = 0; d < 3; d++) dcos_omega_di[d] *= 2.0/poem; + + // dcos_omega_dk + for (int d = 0; d < 3; d++) dcos_omega_dj[d] = -((htrc-arg*hnrc)/rjl) * deljl[d] + htrb/rij * delij[d]; + for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthd-arg*hnhd)/sin_ijk_rnd * dcos_ijk_dj[d]; + for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthe-arg*hnhe)/sin_jil_rnd * dcos_jil_dj[d]; + for (int d = 0; d < 3; d++) dcos_omega_dj[d] *= 2.0/poem; + + // dcos_omega_dl + for (int d = 0; d < 3; d++) dcos_omega_dl[d] = ((htrc-arg*hnrc)/rjl) * deljl[d] + dellk[d]; + for (int d = 0; d < 3; d++) dcos_omega_dl[d] += (hthe-arg*hnhe)/sin_jil_rnd * -dcos_jil_dk[d]; + for (int d = 0; d < 3; d++) dcos_omega_dl[d] *= 2.0/poem; + + cos_omega = cos( omega ); + cos2omega = cos( 2. * omega ); + cos3omega = cos( 3. * omega ); + + // torsion energy + + p_tor1 = paramsfbp(ktype,itype,jtype,ltype).p_tor1; + p_cot1 = paramsfbp(ktype,itype,jtype,ltype).p_cot1; + V1 = paramsfbp(ktype,itype,jtype,ltype).V1; + V2 = paramsfbp(ktype,itype,jtype,ltype).V2; + V3 = paramsfbp(ktype,itype,jtype,ltype).V3; + + exp_tor1 = exp(p_tor1 * SQR(2.0 - d_BO_pi(i,j_index) - f11_DiDj)); + exp_tor2_jl = exp(-p_tor2 * BOA_jl); + exp_cot2_jl = exp(-p_cot2 * SQR(BOA_jl - 1.5) ); + fn10 = (1.0 - exp_tor2_ik) * (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jl); + + CV = 0.5 * (V1 * (1.0 + cos_omega) + V2 * exp_tor1 * (1.0 - cos2omega) + V3 * (1.0 + cos3omega) ); + + e_tor = fn10 * sin_ijk * sin_jil * CV; + if (eflag) ev.ereax[6] += e_tor; + + dfn11 = (-p_tor3 * exp_tor3_DiDj + (p_tor3 * exp_tor3_DiDj - p_tor4 * exp_tor4_DiDj) * + (2.0 + exp_tor3_DiDj) * exp_tor34_inv) * exp_tor34_inv; + + CEtors1 = sin_ijk * sin_jil * CV; + + CEtors2 = -fn10 * 2.0 * p_tor1 * V2 * exp_tor1 * (2.0 - d_BO_pi(i,j_index) - f11_DiDj) * + (1.0 - SQR(cos_omega)) * sin_ijk * sin_jil; + CEtors3 = CEtors2 * dfn11; + + CEtors4 = CEtors1 * p_tor2 * exp_tor2_ik * (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jl); + CEtors5 = CEtors1 * p_tor2 * (1.0 - exp_tor2_ik) * exp_tor2_ij * (1.0 - exp_tor2_jl); + CEtors6 = CEtors1 * p_tor2 * (1.0 - exp_tor2_ik) * (1.0 - exp_tor2_ij) * exp_tor2_jl; + + cmn = -fn10 * CV; + CEtors7 = cmn * sin_jil * tan_ijk_i; + CEtors8 = cmn * sin_ijk * tan_jil_i; + + CEtors9 = fn10 * sin_ijk * sin_jil * + (0.5 * V1 - 2.0 * V2 * exp_tor1 * cos_omega + 1.5 * V3 * (cos2omega + 2.0 * SQR(cos_omega))); + + // 4-body conjugation energy + + fn12 = exp_cot2_ik * exp_cot2_ij * exp_cot2_jl; + e_con = p_cot1 * fn12 * (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jil); + if (eflag) ev.ereax[7] += e_con; + + Cconj = -2.0 * fn12 * p_cot1 * p_cot2 * (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jil); + + CEconj1 = Cconj * (BOA_ik - 1.5e0); + CEconj2 = Cconj * (BOA_ij - 1.5e0); + CEconj3 = Cconj * (BOA_jl - 1.5e0); + + CEconj4 = -p_cot1 * fn12 * (SQR(cos_omega) - 1.0) * sin_jil * tan_ijk_i; + CEconj5 = -p_cot1 * fn12 * (SQR(cos_omega) - 1.0) * sin_ijk * tan_jil_i; + CEconj6 = 2.0 * p_cot1 * fn12 * cos_omega * sin_ijk * sin_jil; + + // forces + + // contribution to bond order + + d_Cdbopi(i,j_index) += CEtors2; + CdDelta_i += CEtors3; + CdDelta_j += CEtors3; + + a_Cdbo(i,k_index) += CEtors4 + CEconj1; + a_Cdbo(i,j_index) += CEtors5 + CEconj2; + a_Cdbo(j,l_index) += CEtors6 + CEconj3; // trouble + + // dcos_theta_ijk + const F_FLOAT coeff74 = CEtors7 + CEconj4; + for (int d = 0; d < 3; d++) fi_tmp[d] = (coeff74) * dcos_ijk_di[d]; + for (int d = 0; d < 3; d++) fj_tmp[d] = (coeff74) * dcos_ijk_dj[d]; + for (int d = 0; d < 3; d++) fk_tmp[d] = (coeff74) * dcos_ijk_dk[d]; + + const F_FLOAT coeff85 = CEtors8 + CEconj5; + // dcos_theta_jil + for (int d = 0; d < 3; d++) fi_tmp[d] += (coeff85) * dcos_jil_di[d]; + for (int d = 0; d < 3; d++) fj_tmp[d] += (coeff85) * dcos_jil_dj[d]; + for (int d = 0; d < 3; d++) fl_tmp[d] = (coeff85) * dcos_jil_dk[d]; + + // dcos_omega + const F_FLOAT coeff96 = CEtors9 + CEconj6; + for (int d = 0; d < 3; d++) fi_tmp[d] += (coeff96) * dcos_omega_di[d]; + for (int d = 0; d < 3; d++) fj_tmp[d] += (coeff96) * dcos_omega_dj[d]; + for (int d = 0; d < 3; d++) fk_tmp[d] += (coeff96) * dcos_omega_dk[d]; + for (int d = 0; d < 3; d++) fl_tmp[d] += (coeff96) * dcos_omega_dl[d]; + + // total forces + + for (int d = 0; d < 3; d++) fitmp[d] -= fi_tmp[d]; + for (int d = 0; d < 3; d++) fjtmp[d] -= fj_tmp[d]; + for (int d = 0; d < 3; d++) fktmp[d] -= fk_tmp[d]; + for (int d = 0; d < 3; d++) a_f(l,d) -= fl_tmp[d]; + + // per-atom energy/virial tally + + if (EVFLAG) { + eng_tmp = e_tor + e_con; + //if (eflag_atom) this->template ev_tally(ev,i,j,eng_tmp,0.0,0.0,0.0,0.0); + if (eflag_atom) this->template e_tally(ev,i,j,eng_tmp); + if (vflag_either) { + for (int d = 0; d < 3; d ++) delil[d] = x(l,d) - x(i,d); + for (int d = 0; d < 3; d ++) delkl[d] = x(l,d) - x(k,d); + this->template v_tally4(ev,k,i,j,l,fk_tmp,fi_tmp,fj_tmp,delkl,delil,deljl); + } + } + + } + for (int d = 0; d < 3; d++) a_f(k,d) += fktmp[d]; + } + } + a_CdDelta[j] += CdDelta_j; + for (int d = 0; d < 3; d++) a_f(j,d) += fjtmp[d]; + } + } + a_CdDelta[i] += CdDelta_i; + for (int d = 0; d < 3; d++) a_f(i,d) += fitmp[d]; +} + template template KOKKOS_INLINE_FUNCTION @@ -3561,6 +4047,14 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsiontemplate operator()(PairReaxComputeTorsion(), ii, ev); } +template +template +KOKKOS_INLINE_FUNCTION +void PairReaxCKokkos::operator()(PairReaxComputeTorsion_with_BLOCKING, const int &ii) const { + + EV_FLOAT_REAX ev; + this->template operator()(PairReaxComputeTorsion_with_BLOCKING(), ii, ev); +} /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/pair_reaxc_kokkos.h b/src/KOKKOS/pair_reaxc_kokkos.h index 42a55224c9..5d80d5d41a 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.h +++ b/src/KOKKOS/pair_reaxc_kokkos.h @@ -102,6 +102,9 @@ struct PairReaxComputeTorsion_preview{}; template struct PairReaxComputeTorsion{}; +template +struct PairReaxComputeTorsion_with_BLOCKING{}; + template struct PairReaxComputeHydrogen{}; @@ -225,6 +228,10 @@ class PairReaxCKokkos : public PairReaxC { KOKKOS_INLINE_FUNCTION void operator()(PairReaxComputeTorsion, const int&, EV_FLOAT_REAX&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(PairReaxComputeTorsion_with_BLOCKING, const int&, EV_FLOAT_REAX&) const; + KOKKOS_INLINE_FUNCTION void operator()(PairReaxComputeTorsion_preview, const int&) const; @@ -232,6 +239,10 @@ class PairReaxCKokkos : public PairReaxC { KOKKOS_INLINE_FUNCTION void operator()(PairReaxComputeTorsion, const int&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(PairReaxComputeTorsion_with_BLOCKING, const int&) const; + template KOKKOS_INLINE_FUNCTION void operator()(PairReaxComputeHydrogen, const int&, EV_FLOAT_REAX&) const; From 926a037cba4845509d3ad794f8ea4aaeba452a23 Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Sat, 24 Oct 2020 15:06:14 -0500 Subject: [PATCH 0050/1471] thread group dimension adjustment for FixQEqReaxKokkosComputeHFunctor Change-Id: I7a62632a2403cdc3491777b090a1cb48d3a7591b --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index e0173e9bc5..4426edb41d 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -236,8 +236,16 @@ void FixQEqReaxKokkos::pre_force(int /*vflag*/) } else { // GPU, use teams Kokkos::deep_copy(d_mfill_offset,0); - int vector_length = 32; +#ifdef KOKKOS_ENABLE_CUDA int atoms_per_team = 4; + int vector_length = 32;//LG changed 32 to 64 +#endif +#ifdef KOKKOS_ENABLE_HIP + int atoms_per_team = 64; + int vector_length = 8;//LG changed 32 to 64 +#endif + + int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0); Kokkos::TeamPolicy policy(num_teams, atoms_per_team, @@ -805,11 +813,12 @@ void FixQEqReaxKokkos::cg_solve1() if (need_dup) Kokkos::Experimental::contribute(d_o, dup_o); } else { -#ifndef HIP_OPT_SPMV - Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); -#else + +#ifdef HIP_OPT_SPMV int teamsize = 1024/64; Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, 64), *this); +#else + Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); #endif } @@ -952,8 +961,9 @@ void FixQEqReaxKokkos::cg_solve2() #ifndef HIP_OPT_SPMV Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); #else - int teamsize = 1024/64; //LG need to use Kokkos functionality to get max warp size - Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, 64), *this); + int vector_length=64; + int teamsize = 1024/vector_length; //LG need to use Kokkos functionality to get max warp size + Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, vector_length), *this); #endif } From dff61617fe8ef3a1ebcc3d1a1c6b4a0e836362fd Mon Sep 17 00:00:00 2001 From: Sriranjani Sitaraman Date: Tue, 27 Oct 2020 17:06:56 -0500 Subject: [PATCH 0051/1471] Change LaunchBounds to 64 for TagPairTersoffComputeHalf Change-Id: I7fb8871c30acab4fc98654df618af6e786bc6244 --- src/KOKKOS/pair_tersoff_kokkos.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index adfd46c6e8..b3c4d0cbd6 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -226,15 +226,15 @@ void PairTersoffKokkos::compute(int eflag_in, int vflag_in) if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); ev_all += ev; } else if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); ev_all += ev; } else if (neighflag == FULL) { if (evflag) From 844dea3e1be72a1277cd64661dbae9da60850217 Mon Sep 17 00:00:00 2001 From: Leopold Grinberg Date: Wed, 28 Oct 2020 13:17:52 -0500 Subject: [PATCH 0052/1471] temporary fix for EAM Change-Id: Ia39bf1b4c3adcbadf0b1bde39d6817f44869946b --- src/KOKKOS/pair_eam_kokkos.cpp | 76 +++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 39869e4d61..4b6d253a77 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -180,7 +180,7 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) // compute kernel AB if (eflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy,Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy,Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy,Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); } @@ -201,21 +201,21 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) if (evflag) { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); } } } else { @@ -227,7 +227,7 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); } else { Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); } @@ -638,17 +638,20 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int // loop over neighbors of my atoms -#ifdef KOKKOS_ENABLE_HIP -#ifdef __HIP_DEVICE_COMPILE__ - __shared__ F_FLOAT A[500][7]; +#if defined(HIP_OPT_EAM_SHARED) && (defined(__HIP_DEVICE_COMPILE__) || defined(__CUDA_ARCH__)) + __shared__ F_FLOAT A[500][7]; + const int m_max = d_rhor_spline.extent_int(1); + const int d_rhor_spline_cached = m_max > 500 ? 0 : 1; - for (int i = threadIdx.y; i < 500*7; i+=blockDim.y){ - int j = i%7; - int m = i/7; - A[m][j] = d_rhor_spline(0,m,j); - } + if (d_rhor_spline_cached){ + for (int i = threadIdx.y; i < m_max*7; i+=blockDim.y){ + int j = i%7; + int m = i/7; + if ( d_rhor_spline(0,m,j) < -55.65 ) printf("A: m=%d, j=%d, i = %d\n",m,j,i); + A[m][j] = d_rhor_spline(0,m,j); + } __syncthreads(); -#endif + } #endif @@ -678,18 +681,18 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int p -= m; p = MIN(p,1.0); const int d_type2rhor_ji = d_type2rhor(jtype,itype); - #ifdef __HIP_DEVICE_COMPILE__ - if (d_type2rhor_ji == 0){ + #if defined(HIP_OPT_EAM_SHARED) && (defined(__HIP_DEVICE_COMPILE__) || defined(__CUDA_ARCH__)) + if (d_type2rhor_ji == 0 && d_rhor_spline_cached == 1){ rhotmp += ( (A[m][3]*p + A[m][4])*p + A[m][5] )*p + A[m][6]; } else rhotmp += ( (d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); - #else + #else rhotmp += ( (d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); - #endif + #endif } } @@ -736,18 +739,24 @@ void PairEAMKokkos::operator()(g, cons // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial -#ifdef KOKKOS_ENABLE_HIP -#ifdef __HIP_DEVICE_COMPILE__ - __shared__ F_FLOAT A[500][7]; //LG assuming second dimension of d_z2r_spline is 500; - //need to recode to "nr+1" - for (int i = threadIdx.y; i < 500*7; i+=blockDim.y){ +//#ifdef KOKKOS_ENABLE_HIP +//#ifdef __HIP_DEVICE_COMPILE__ +#if defined(HIP_OPT_EAM_SHARED) && (defined(__HIP_DEVICE_COMPILE__) || defined(__CUDA_ARCH__)) + + __shared__ F_FLOAT A[500][7]; //LG assuming second dimension of d_z2r_spline is 500 or less; + //if more then shared memory is not used + + const int m_max = d_z2r_spline.extent_int(1); + const int d_z2r_spline_cached = m_max > 500 ? 0 : 1; + + for (int i = threadIdx.y; i < m_max*7; i+=blockDim.y){ int j = i%7; int m = i/7; - A[m][j] = d_z2r_spline(0,m,j); + if ( d_z2r_spline(0,m,j) < -99955.65 ) printf("C: m=%d, j=%d, i = %d\n",m,j,i); + A[m][j] = d_z2r_spline(0,m,j); } __syncthreads(); #endif -#endif auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); @@ -805,11 +814,12 @@ void PairEAMKokkos::operator()(g, cons const int d_type2z2r_ij = d_type2z2r(itype,jtype); - #ifdef __HIP_DEVICE_COMPILE__ - const auto z2r_spline_3 = (0 == d_type2z2r_ij) ? A[m][3] : d_z2r_spline(d_type2z2r_ij,m,3); - const auto z2r_spline_4 = (0 == d_type2z2r_ij) ? A[m][4] : d_z2r_spline(d_type2z2r_ij,m,4); - const auto z2r_spline_5 = (0 == d_type2z2r_ij) ? A[m][5] : d_z2r_spline(d_type2z2r_ij,m,5); - const auto z2r_spline_6 = (0 == d_type2z2r_ij) ? A[m][6] : d_z2r_spline(d_type2z2r_ij,m,6); + #if defined(HIP_OPT_EAM_SHARED) && (defined(__HIP_DEVICE_COMPILE__) || defined(__CUDA_ARCH__)) + const auto have_cache = (d_z2r_spline_cached == 1) && (0 == d_type2z2r_ij) + const auto z2r_spline_3 = (have_cache) ? A[m][3] : d_z2r_spline(d_type2z2r_ij,m,3); + const auto z2r_spline_4 = (have_cache) ? A[m][4] : d_z2r_spline(d_type2z2r_ij,m,4); + const auto z2r_spline_5 = (have_cache) ? A[m][5] : d_z2r_spline(d_type2z2r_ij,m,5); + const auto z2r_spline_6 = (have_cache) ? A[m][6] : d_z2r_spline(d_type2z2r_ij,m,6); #else const auto z2r_spline_3 = d_z2r_spline(d_type2z2r_ij,m,3); const auto z2r_spline_4 = d_z2r_spline(d_type2z2r_ij,m,4); From 18466fb05b929692834a5c7b2e6f469a024761de Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Tue, 17 Nov 2020 09:31:09 -0600 Subject: [PATCH 0053/1471] legal stuff Change-Id: I5ab375285fdea9b20d8174a582aca9e78bf0f8a3 --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 18 ++++++++++++++++++ src/KOKKOS/kokkos_base.h | 18 ++++++++++++++++++ src/KSPACE/kissfft.h | 18 ++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 4426edb41d..40ee4e1472 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -16,6 +16,24 @@ Kamesh Arumugam (NVIDIA) ------------------------------------------------------------------------- */ +/* +Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + #include "fix_qeq_reax_kokkos.h" #include "atom.h" diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h index 855ccf9108..95dc4f5608 100644 --- a/src/KOKKOS/kokkos_base.h +++ b/src/KOKKOS/kokkos_base.h @@ -11,6 +11,24 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* +Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + #ifndef KOKKOS_BASE_H #define KOKKOS_BASE_H diff --git a/src/KSPACE/kissfft.h b/src/KSPACE/kissfft.h index 8cb6dec820..a5db8ea13d 100644 --- a/src/KSPACE/kissfft.h +++ b/src/KSPACE/kissfft.h @@ -6,6 +6,24 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* +Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + /* we use a stripped down KISS FFT as default FFT for LAMMPS this code is adapted from kiss_fft_v1_2_9 From f629de84f9c3cba0933fddc31d0a20a36dfebbdb Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Wed, 18 Nov 2020 15:16:23 -0600 Subject: [PATCH 0054/1471] cleanup printf workarounds Change-Id: Id9960f545fa0159d6c38392711b6ac584715d3d9 --- src/KOKKOS/pair_eam_kokkos.cpp | 2 ++ src/KOKKOS/pair_reaxc_kokkos.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 4b6d253a77..e45bb59346 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -647,7 +647,9 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int for (int i = threadIdx.y; i < m_max*7; i+=blockDim.y){ int j = i%7; int m = i/7; +#ifdef HIP_PRINTF_WORKAROUND if ( d_rhor_spline(0,m,j) < -55.65 ) printf("A: m=%d, j=%d, i = %d\n",m,j,i); +#endif A[m][j] = d_rhor_spline(0,m,j); } __syncthreads(); diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 2a39c1a287..9c96781c7d 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -3545,7 +3545,7 @@ void PairReaxCKokkos::operator()(PairReaxComputeTorsion Date: Wed, 14 Apr 2021 16:50:37 -0500 Subject: [PATCH 0055/1471] hide additional printf Change-Id: I200c85013b9d0163847194ad4b18ac33af65a823 --- src/KOKKOS/pair_eam_kokkos.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index e45bb59346..595df389a2 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -754,7 +754,9 @@ void PairEAMKokkos::operator()(g, cons for (int i = threadIdx.y; i < m_max*7; i+=blockDim.y){ int j = i%7; int m = i/7; +#ifdef HIP_PRINTF_WORKAROUND if ( d_z2r_spline(0,m,j) < -99955.65 ) printf("C: m=%d, j=%d, i = %d\n",m,j,i); +#endif A[m][j] = d_z2r_spline(0,m,j); } __syncthreads(); From fc2eff64bb5980b9ae0ceda82e8780d00ea3032f Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Tue, 30 Mar 2021 17:04:06 -0500 Subject: [PATCH 0056/1471] remove hard coded launchbounds / teamsizes for compat. w/ Kokkos LB=256 default Change-Id: I7dcd26eee32b3fc049549ae380ffaf85423eed2e --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 35 ++++++++++------ src/KOKKOS/pair_eam_kokkos.cpp | 28 ++++++------- src/KOKKOS/pair_reaxc_kokkos.cpp | 64 +++++++++++++++--------------- src/KOKKOS/pair_tersoff_kokkos.cpp | 16 ++++---- 4 files changed, 77 insertions(+), 66 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 40ee4e1472..968a87a3b8 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -259,8 +259,8 @@ void FixQEqReaxKokkos::pre_force(int /*vflag*/) int vector_length = 32;//LG changed 32 to 64 #endif #ifdef KOKKOS_ENABLE_HIP - int atoms_per_team = 64; - int vector_length = 8;//LG changed 32 to 64 + int atoms_per_team = 4; + int vector_length = 64; #endif @@ -759,7 +759,14 @@ void FixQEqReaxKokkos::cg_solve1() int teamsize; if (execution_space == Host) teamsize = 1; - else teamsize = 128; + else { + #ifdef KOKKOS_ENABLE_HIP + teamsize = 4; + #else + teamsize = 128; + #endif + } + // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse12Functor sparse12_functor(this); @@ -833,8 +840,8 @@ void FixQEqReaxKokkos::cg_solve1() } else { #ifdef HIP_OPT_SPMV - int teamsize = 1024/64; - Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, 64), *this); + using team_policy = Kokkos::TeamPolicy ; + Kokkos::parallel_for(team_policy((inum+teamsize-1)/teamsize, teamsize, team_policy::vector_length_max()), *this); #else Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); #endif @@ -897,7 +904,13 @@ void FixQEqReaxKokkos::cg_solve2() int teamsize; if (execution_space == Host) teamsize = 1; - else teamsize = 64; + else { + #ifdef KOKKOS_ENABLE_HIP + teamsize = 4; + #else + teamsize = 64; + #endif + } // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse32Functor sparse32_functor(this); @@ -917,9 +930,8 @@ void FixQEqReaxKokkos::cg_solve2() Kokkos::Experimental::contribute(d_o, dup_o); } else { #ifdef HIP_OPT_SPMV - int vector_length = 64; - int teamsize = 1024/vector_length;//LG need to use some Kokkos function to get WARP_SIZE (max vector size) - Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, vector_length), *this); + using team_policy = Kokkos::TeamPolicy ; + Kokkos::parallel_for(team_policy((inum+teamsize-1)/teamsize, teamsize, team_policy::vector_length_max()), *this); #else Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); #endif @@ -979,9 +991,8 @@ void FixQEqReaxKokkos::cg_solve2() #ifndef HIP_OPT_SPMV Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); #else - int vector_length=64; - int teamsize = 1024/vector_length; //LG need to use Kokkos functionality to get max warp size - Kokkos::parallel_for(Kokkos::TeamPolicy ((inum+teamsize-1)/teamsize, teamsize, vector_length), *this); + using team_policy = Kokkos::TeamPolicy ; + Kokkos::parallel_for(team_policy((inum+teamsize-1)/teamsize, teamsize, team_policy::vector_length_max()), *this); #endif } diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 595df389a2..0fc1713b57 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -180,9 +180,9 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) // compute kernel AB if (eflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy,Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy,Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } if (eflag) { @@ -201,41 +201,41 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) if (evflag) { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); } else { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); } } } else { if (neighflag == HALF) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } } else if (neighflag == HALFTHREAD) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } } else if (neighflag == FULL) { if (newton_pair) { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else { - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<1024, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } } } diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 9c96781c7d..b6a2adacdd 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -804,14 +804,14 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) } else { if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<128, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<128, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } else if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<128, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<128, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } } ev_all += ev; @@ -850,12 +850,12 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) k_resize_hb.sync(); // zero - Kokkos::parallel_for(Kokkos::RangePolicy>(0,nmax),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nmax),*this); if (neighflag == HALF) - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); else if (neighflag == HALFTHREAD) - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); k_resize_bo.modify(); k_resize_bo.sync(); @@ -943,15 +943,15 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) // Angular if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); ev_all += ev; } pvector[4] = ev.ereax[3]; @@ -993,7 +993,7 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) #endif } - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum),*this); Kokkos::fence(); int nnz = 0; for (int i = 0; i < inum; ++i){ @@ -1005,30 +1005,30 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) #ifdef HIP_OPT_TORSION_PREVIEW_BLOCKING if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,nnz),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,nnz),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,nnz),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nnz),*this); ev_all += ev; } else { if (evflag) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,nnz),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,nnz),*this,ev); } else{ - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,nnz),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nnz),*this); } ev_all += ev; } #else if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,nnz),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nnz),*this); ev_all += ev; } else { if (evflag) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,nnz),*this,ev); } else{ - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,nnz),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,nnz),*this); } ev_all += ev; } @@ -1040,15 +1040,15 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) // Torsion if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); ev_all += ev; } else { if (evflag) { - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); } else{ - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } ev_all += ev; } @@ -1062,15 +1062,15 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) if (cut_hbsq > 0.0) { if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); ev_all += ev; } } @@ -1102,9 +1102,9 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) //} if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ignum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,ignum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); ev_all += ev; pvector[0] += ev.evdwl; } else { //if (neighflag == HALFTHREAD) { @@ -1118,9 +1118,9 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) //} if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ignum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,ignum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); ev_all += ev; pvector[0] += ev.evdwl; } diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index b3c4d0cbd6..c14cd389d2 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -226,27 +226,27 @@ void PairTersoffKokkos::compute(int eflag_in, int vflag_in) if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); ev_all += ev; } else if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<64, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); ev_all += ev; } else if (neighflag == FULL) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); ev_all += ev; if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,ignum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ignum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy, Kokkos::LaunchBounds<256, 1> >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); ev_all += ev; } From ad1c1637d44528ddda953fb2c69564ac94c767ca Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Wed, 28 Apr 2021 10:34:39 -0500 Subject: [PATCH 0057/1471] cleanup & the various vector-wise SPMVs Change-Id: Id29ec4e1968ddfccf26335a0f41a8652db33b89b --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 85 +++++++++++++++++++++--------- src/KOKKOS/fix_qeq_reax_kokkos.h | 15 ++++++ 2 files changed, 76 insertions(+), 24 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 968a87a3b8..6c30c21544 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -758,12 +758,22 @@ void FixQEqReaxKokkos::cg_solve1() F_FLOAT tmp, sig_old, b_norm; int teamsize; - if (execution_space == Host) teamsize = 1; + int vectorsize; + int leaguesize; + if (execution_space == Host) { + teamsize = 1; + vectorsize = 1; + leaguesize = inum; + } else { - #ifdef KOKKOS_ENABLE_HIP - teamsize = 4; + #ifdef HIP_OPT_SPMV + teamsize = 16; + vectorsize = 64; + leaguesize = (inum + teamsize - 1) / (teamsize); #else teamsize = 128; + vectorsize = 1; + leaguesize = inum; #endif } @@ -783,7 +793,11 @@ void FixQEqReaxKokkos::cg_solve1() if (need_dup) Kokkos::Experimental::contribute(d_o, dup_o); } else { + #ifdef HIP_OPT_SPMV + Kokkos::parallel_for(Kokkos::TeamPolicy (leaguesize, teamsize, vectorsize), *this); + #else Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); + #endif } if (neighflag != FULL) { @@ -840,8 +854,7 @@ void FixQEqReaxKokkos::cg_solve1() } else { #ifdef HIP_OPT_SPMV - using team_policy = Kokkos::TeamPolicy ; - Kokkos::parallel_for(team_policy((inum+teamsize-1)/teamsize, teamsize, team_policy::vector_length_max()), *this); + Kokkos::parallel_for(Kokkos::TeamPolicy (leaguesize, teamsize, vectorsize), *this); #else Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); #endif @@ -903,13 +916,23 @@ void FixQEqReaxKokkos::cg_solve2() F_FLOAT tmp, sig_old, b_norm; int teamsize; - if (execution_space == Host) teamsize = 1; + int vectorsize; + int leaguesize; + if (execution_space == Host) { + teamsize = 1; + vectorsize = 1; + leaguesize = inum; + } else { - #ifdef KOKKOS_ENABLE_HIP - teamsize = 4; - #else + #ifdef HIP_OPT_SPMV + teamsize = 16; + vectorsize = 64; + leaguesize = (inum + teamsize - 1) / (teamsize); + #else teamsize = 64; - #endif + vectorsize = 1; + leaguesize = inum; + #endif } // sparse_matvec( &H, x, q ); @@ -930,8 +953,7 @@ void FixQEqReaxKokkos::cg_solve2() Kokkos::Experimental::contribute(d_o, dup_o); } else { #ifdef HIP_OPT_SPMV - using team_policy = Kokkos::TeamPolicy ; - Kokkos::parallel_for(team_policy((inum+teamsize-1)/teamsize, teamsize, team_policy::vector_length_max()), *this); + Kokkos::parallel_for(Kokkos::TeamPolicy (leaguesize, teamsize, vectorsize), *this); #else Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); #endif @@ -988,11 +1010,10 @@ void FixQEqReaxKokkos::cg_solve2() if (need_dup) Kokkos::Experimental::contribute(d_o, dup_o); } else { -#ifndef HIP_OPT_SPMV - Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); +#ifdef HIP_OPT_SPMV + Kokkos::parallel_for(Kokkos::TeamPolicy (leaguesize, teamsize, vectorsize), *this); #else - using team_policy = Kokkos::TeamPolicy ; - Kokkos::parallel_for(team_policy((inum+teamsize-1)/teamsize, teamsize, team_policy::vector_length_max()), *this); + Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); #endif } @@ -1130,6 +1151,22 @@ void FixQEqReaxKokkos::operator() (TagSparseMatvec1, const membertyp } } +template +KOKKOS_INLINE_FUNCTION +void FixQEqReaxKokkos::operator() (TagSparseMatvec1Vector, const membertype1vec &team) const +{ + int k = team.league_rank () * team.team_size () + team.team_rank (); + const int i = d_ilist[k]; + if (mask[i] & groupbit) { + F_FLOAT doitmp; + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &doi) { + const int j = d_jlist(jj); + doi += d_val(jj) * d_s[j]; + }, doitmp); + Kokkos::single(Kokkos::PerThread(team), [&] () {d_o[i] += doitmp; }); + } +} + /* ---------------------------------------------------------------------- */ template @@ -1168,10 +1205,10 @@ void FixQEqReaxKokkos::sparse23_item(int ii) const /* ---------------------------------------------------------------------- */ -#ifdef HIP_OPT_SPMV + template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::operator() (TagSparseMatvec2, const membertype2 &team) const +void FixQEqReaxKokkos::operator() (TagSparseMatvec2Vector, const membertype2vec &team) const { int k = team.league_rank () * team.team_size () + team.team_rank (); const int i = d_ilist[k]; @@ -1184,7 +1221,7 @@ void FixQEqReaxKokkos::operator() (TagSparseMatvec2, const membertyp Kokkos::single(Kokkos::PerThread(team), [&] () {d_o[i] += doitmp; }); } } -#else + template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::operator() (TagSparseMatvec2, const membertype2 &team) const @@ -1199,7 +1236,7 @@ void FixQEqReaxKokkos::operator() (TagSparseMatvec2, const membertyp Kokkos::single(Kokkos::PerTeam(team), [&] () {d_o[i] += doitmp; }); } } -#endif + template KOKKOS_INLINE_FUNCTION @@ -1245,10 +1282,10 @@ void FixQEqReaxKokkos::sparse33_item(int ii) const } /* ---------------------------------------------------------------------- */ -#ifdef HIP_OPT_SPMV + template KOKKOS_INLINE_FUNCTION -void FixQEqReaxKokkos::operator() (TagSparseMatvec3, const membertype3 &team) const +void FixQEqReaxKokkos::operator() (TagSparseMatvec3Vector, const membertype3vec &team) const { int k = team.league_rank () * team.team_size () + team.team_rank (); const int i = d_ilist[k]; @@ -1261,7 +1298,7 @@ void FixQEqReaxKokkos::operator() (TagSparseMatvec3, const membertyp Kokkos::single(Kokkos::PerThread(team), [&] () {d_o[i] += doitmp;}); } } -#else + template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::operator() (TagSparseMatvec3, const membertype3 &team) const @@ -1276,7 +1313,7 @@ void FixQEqReaxKokkos::operator() (TagSparseMatvec3, const membertyp Kokkos::single(Kokkos::PerTeam(team), [&] () {d_o[i] += doitmp;}); } } -#endif + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.h b/src/KOKKOS/fix_qeq_reax_kokkos.h index ec68990ecd..f89d2ddfd3 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.h +++ b/src/KOKKOS/fix_qeq_reax_kokkos.h @@ -32,8 +32,11 @@ FixStyle(qeq/reax/kk/host,FixQEqReaxKokkos) namespace LAMMPS_NS { struct TagSparseMatvec1 {}; +struct TagSparseMatvec1Vector {}; struct TagSparseMatvec2 {}; +struct TagSparseMatvec2Vector {}; struct TagSparseMatvec3 {}; +struct TagSparseMatvec3Vector {}; struct TagZeroQGhosts{}; struct TagFixQEqReaxPackForwardComm {}; struct TagFixQEqReaxUnpackForwardComm {}; @@ -94,14 +97,26 @@ class FixQEqReaxKokkos : public FixQEqReax { KOKKOS_INLINE_FUNCTION void operator() (TagSparseMatvec1, const membertype1 &team) const; + typedef typename Kokkos::TeamPolicy ::member_type membertype1vec; + KOKKOS_INLINE_FUNCTION + void operator() (TagSparseMatvec1Vector, const membertype1vec &team) const; + typedef typename Kokkos::TeamPolicy ::member_type membertype2; KOKKOS_INLINE_FUNCTION void operator() (TagSparseMatvec2, const membertype2 &team) const; + typedef typename Kokkos::TeamPolicy ::member_type membertype2vec; + KOKKOS_INLINE_FUNCTION + void operator() (TagSparseMatvec2Vector, const membertype2vec &team) const; + typedef typename Kokkos::TeamPolicy ::member_type membertype3; KOKKOS_INLINE_FUNCTION void operator() (TagSparseMatvec3, const membertype3 &team) const; + typedef typename Kokkos::TeamPolicy ::member_type membertype3vec; + KOKKOS_INLINE_FUNCTION + void operator() (TagSparseMatvec3Vector, const membertype3vec &team) const; + KOKKOS_INLINE_FUNCTION void operator()(TagZeroQGhosts, const int&) const; From abcac33c6305658b04a8acc8885a4a2e9c6fdd62 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Fri, 14 May 2021 16:21:24 -0400 Subject: [PATCH 0058/1471] cleanup rebase errors Change-Id: Ib8e255fefcb17ff91bc0bfdfcb98e56d3387b9b2 --- src/KOKKOS/comm_kokkos.cpp | 2 -- src/KOKKOS/fix_qeq_reax_kokkos.h | 2 +- src/KOKKOS/pair_eam_kokkos.cpp | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index c79d5f5057..cd98261f83 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -474,8 +474,6 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) MPI_Request request; DAT::tdual_xfloat_1d k_buf_tmp; - MPI_Request request; - int nsize = pair->comm_forward; KokkosBase* pairKKBase = dynamic_cast(pair); diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.h b/src/KOKKOS/fix_qeq_reax_kokkos.h index f89d2ddfd3..d191135a61 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.h +++ b/src/KOKKOS/fix_qeq_reax_kokkos.h @@ -43,7 +43,7 @@ struct TagFixQEqReaxUnpackForwardComm {}; template -class FixQEqReaxKokkos : public FixQEqReax { +class FixQEqReaxKokkos : public FixQEqReax, public KokkosBase { public: typedef DeviceType device_type; typedef ArrayTypes AT; diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 0fc1713b57..bcc512d528 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -736,7 +736,7 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int template template KOKKOS_INLINE_FUNCTION -void PairEAMKokkos::operator()(g, const int &ii, EV_FLOAT& ev) const { +void PairEAMKokkos::operator()(TagPairEAMKernelC, const int &ii, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial @@ -819,7 +819,7 @@ void PairEAMKokkos::operator()(g, cons const int d_type2z2r_ij = d_type2z2r(itype,jtype); #if defined(HIP_OPT_EAM_SHARED) && (defined(__HIP_DEVICE_COMPILE__) || defined(__CUDA_ARCH__)) - const auto have_cache = (d_z2r_spline_cached == 1) && (0 == d_type2z2r_ij) + const auto have_cache = (d_z2r_spline_cached == 1) && (0 == d_type2z2r_ij); const auto z2r_spline_3 = (have_cache) ? A[m][3] : d_z2r_spline(d_type2z2r_ij,m,3); const auto z2r_spline_4 = (have_cache) ? A[m][4] : d_z2r_spline(d_type2z2r_ij,m,4); const auto z2r_spline_5 = (have_cache) ? A[m][5] : d_z2r_spline(d_type2z2r_ij,m,5); From f44f983b3b4645e35f598c5b1592ac570ca68374 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Mon, 17 May 2021 09:52:10 -0400 Subject: [PATCH 0059/1471] Restore the previous atom/teamsizes for ComputeHFunctor Change-Id: I292280efd3d5d0d6a7486c1e9798ef4f632d9df4 --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 6c30c21544..c6439d455f 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -256,11 +256,11 @@ void FixQEqReaxKokkos::pre_force(int /*vflag*/) #ifdef KOKKOS_ENABLE_CUDA int atoms_per_team = 4; - int vector_length = 32;//LG changed 32 to 64 + int vector_length = 32; #endif #ifdef KOKKOS_ENABLE_HIP - int atoms_per_team = 4; - int vector_length = 64; + int atoms_per_team = 64; + int vector_length = 8; #endif From 3f42e974055932cb51805a01ae278947bd52f681 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Mon, 17 May 2021 13:01:33 -0400 Subject: [PATCH 0060/1471] remove old printf workarounds Change-Id: I46a66538c886c9961977e0433e900bc91d92c4f2 --- src/KOKKOS/pair_eam_kokkos.cpp | 6 ------ src/KOKKOS/pair_reaxc_kokkos.cpp | 14 -------------- 2 files changed, 20 deletions(-) diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index bcc512d528..bdcab4b1aa 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -647,9 +647,6 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int for (int i = threadIdx.y; i < m_max*7; i+=blockDim.y){ int j = i%7; int m = i/7; -#ifdef HIP_PRINTF_WORKAROUND - if ( d_rhor_spline(0,m,j) < -55.65 ) printf("A: m=%d, j=%d, i = %d\n",m,j,i); -#endif A[m][j] = d_rhor_spline(0,m,j); } __syncthreads(); @@ -754,9 +751,6 @@ void PairEAMKokkos::operator()(TagPairEAMKernelC::operator()(PairReaxComputeTorsion Date: Mon, 21 Jun 2021 11:50:57 -0400 Subject: [PATCH 0061/1471] Update fmtlib to 8.0.0 --- src/fmt/args.h | 232 +++ src/fmt/chrono.h | 420 +++-- src/fmt/color.h | 120 +- src/fmt/compile.h | 834 +++++----- src/fmt/core.h | 2254 +++++++++++++++++-------- src/fmt/format-inl.h | 2044 +++++++++++------------ src/fmt/format.h | 3685 ++++++++++++++--------------------------- src/fmt/locale.h | 66 +- src/fmt/os.h | 181 +- src/fmt/ostream.h | 4 + src/fmt/printf.h | 467 ++---- src/fmt/ranges.h | 309 ++-- src/fmt/xchar.h | 236 +++ src/fmtlib_format.cpp | 39 +- src/fmtlib_os.cpp | 115 +- 15 files changed, 5575 insertions(+), 5431 deletions(-) create mode 100644 src/fmt/args.h create mode 100644 src/fmt/xchar.h diff --git a/src/fmt/args.h b/src/fmt/args.h new file mode 100644 index 0000000000..562e8ab111 --- /dev/null +++ b/src/fmt/args.h @@ -0,0 +1,232 @@ +// Formatting library for C++ - dynamic format arguments +// +// Copyright (c) 2012 - present, Victor Zverovich +// All rights reserved. +// +// For the license information refer to format.h. + +#ifndef FMT_ARGS_H_ +#define FMT_ARGS_H_ + +#include // std::reference_wrapper +#include // std::unique_ptr +#include + +#include "core.h" + +FMT_BEGIN_NAMESPACE + +namespace detail { + +template struct is_reference_wrapper : std::false_type {}; +template +struct is_reference_wrapper> : std::true_type {}; + +template const T& unwrap(const T& v) { return v; } +template const T& unwrap(const std::reference_wrapper& v) { + return static_cast(v); +} + +class dynamic_arg_list { + // Workaround for clang's -Wweak-vtables. Unlike for regular classes, for + // templates it doesn't complain about inability to deduce single translation + // unit for placing vtable. So storage_node_base is made a fake template. + template struct node { + virtual ~node() = default; + std::unique_ptr> next; + }; + + template struct typed_node : node<> { + T value; + + template + FMT_CONSTEXPR typed_node(const Arg& arg) : value(arg) {} + + template + FMT_CONSTEXPR typed_node(const basic_string_view& arg) + : value(arg.data(), arg.size()) {} + }; + + std::unique_ptr> head_; + + public: + template const T& push(const Arg& arg) { + auto new_node = std::unique_ptr>(new typed_node(arg)); + auto& value = new_node->value; + new_node->next = std::move(head_); + head_ = std::move(new_node); + return value; + } +}; +} // namespace detail + +/** + \rst + A dynamic version of `fmt::format_arg_store`. + It's equipped with a storage to potentially temporary objects which lifetimes + could be shorter than the format arguments object. + + It can be implicitly converted into `~fmt::basic_format_args` for passing + into type-erased formatting functions such as `~fmt::vformat`. + \endrst + */ +template +class dynamic_format_arg_store +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 + // Workaround a GCC template argument substitution bug. + : public basic_format_args +#endif +{ + private: + using char_type = typename Context::char_type; + + template struct need_copy { + static constexpr detail::type mapped_type = + detail::mapped_type_constant::value; + + enum { + value = !(detail::is_reference_wrapper::value || + std::is_same>::value || + std::is_same>::value || + (mapped_type != detail::type::cstring_type && + mapped_type != detail::type::string_type && + mapped_type != detail::type::custom_type)) + }; + }; + + template + using stored_type = conditional_t::value && + !has_formatter::value && + !detail::is_reference_wrapper::value, + std::basic_string, T>; + + // Storage of basic_format_arg must be contiguous. + std::vector> data_; + std::vector> named_info_; + + // Storage of arguments not fitting into basic_format_arg must grow + // without relocation because items in data_ refer to it. + detail::dynamic_arg_list dynamic_args_; + + friend class basic_format_args; + + unsigned long long get_types() const { + return detail::is_unpacked_bit | data_.size() | + (named_info_.empty() + ? 0ULL + : static_cast(detail::has_named_args_bit)); + } + + const basic_format_arg* data() const { + return named_info_.empty() ? data_.data() : data_.data() + 1; + } + + template void emplace_arg(const T& arg) { + data_.emplace_back(detail::make_arg(arg)); + } + + template + void emplace_arg(const detail::named_arg& arg) { + if (named_info_.empty()) { + constexpr const detail::named_arg_info* zero_ptr{nullptr}; + data_.insert(data_.begin(), {zero_ptr, 0}); + } + data_.emplace_back(detail::make_arg(detail::unwrap(arg.value))); + auto pop_one = [](std::vector>* data) { + data->pop_back(); + }; + std::unique_ptr>, decltype(pop_one)> + guard{&data_, pop_one}; + named_info_.push_back({arg.name, static_cast(data_.size() - 2u)}); + data_[0].value_.named_args = {named_info_.data(), named_info_.size()}; + guard.release(); + } + + public: + /** + \rst + Adds an argument into the dynamic store for later passing to a formatting + function. + + Note that custom types and string types (but not string views) are copied + into the store dynamically allocating memory if necessary. + + **Example**:: + + fmt::dynamic_format_arg_store store; + store.push_back(42); + store.push_back("abc"); + store.push_back(1.5f); + std::string result = fmt::vformat("{} and {} and {}", store); + \endrst + */ + template void push_back(const T& arg) { + if (detail::const_check(need_copy::value)) + emplace_arg(dynamic_args_.push>(arg)); + else + emplace_arg(detail::unwrap(arg)); + } + + /** + \rst + Adds a reference to the argument into the dynamic store for later passing to + a formatting function. + + **Example**:: + + fmt::dynamic_format_arg_store store; + char band[] = "Rolling Stones"; + store.push_back(std::cref(band)); + band[9] = 'c'; // Changing str affects the output. + std::string result = fmt::vformat("{}", store); + // result == "Rolling Scones" + \endrst + */ + template void push_back(std::reference_wrapper arg) { + static_assert( + need_copy::value, + "objects of built-in types and string views are always copied"); + emplace_arg(arg.get()); + } + + /** + Adds named argument into the dynamic store for later passing to a formatting + function. ``std::reference_wrapper`` is supported to avoid copying of the + argument. The name is always copied into the store. + */ + template + void push_back(const detail::named_arg& arg) { + const char_type* arg_name = + dynamic_args_.push>(arg.name).c_str(); + if (detail::const_check(need_copy::value)) { + emplace_arg( + fmt::arg(arg_name, dynamic_args_.push>(arg.value))); + } else { + emplace_arg(fmt::arg(arg_name, arg.value)); + } + } + + /** Erase all elements from the store */ + void clear() { + data_.clear(); + named_info_.clear(); + dynamic_args_ = detail::dynamic_arg_list(); + } + + /** + \rst + Reserves space to store at least *new_cap* arguments including + *new_cap_named* named arguments. + \endrst + */ + void reserve(size_t new_cap, size_t new_cap_named) { + FMT_ASSERT(new_cap >= new_cap_named, + "Set of arguments includes set of named arguments"); + data_.reserve(new_cap); + named_info_.reserve(new_cap_named); + } +}; + +FMT_END_NAMESPACE + +#endif // FMT_ARGS_H_ diff --git a/src/fmt/chrono.h b/src/fmt/chrono.h index 1a3b8d5e5c..c024fd710c 100644 --- a/src/fmt/chrono.h +++ b/src/fmt/chrono.h @@ -8,13 +8,13 @@ #ifndef FMT_CHRONO_H_ #define FMT_CHRONO_H_ +#include #include #include #include #include #include "format.h" -#include "locale.h" FMT_BEGIN_NAMESPACE @@ -282,13 +282,89 @@ To safe_duration_cast(std::chrono::duration from, #define FMT_NOMACRO namespace detail { +template struct null {}; inline null<> localtime_r FMT_NOMACRO(...) { return null<>(); } inline null<> localtime_s(...) { return null<>(); } inline null<> gmtime_r(...) { return null<>(); } inline null<> gmtime_s(...) { return null<>(); } + +inline auto do_write(const std::tm& time, const std::locale& loc, char format, + char modifier) -> std::string { + auto&& os = std::ostringstream(); + os.imbue(loc); + using iterator = std::ostreambuf_iterator; + const auto& facet = std::use_facet>(loc); + auto end = facet.put(os, os, ' ', &time, format, modifier); + if (end.failed()) FMT_THROW(format_error("failed to format time")); + auto str = os.str(); + if (!detail::is_utf8() || loc == std::locale::classic()) return str; + // char16_t and char32_t codecvts are broken in MSVC (linkage errors) and + // gcc-4. +#if FMT_MSC_VER != 0 || \ + (defined(__GLIBCXX__) && !defined(_GLIBCXX_USE_DUAL_ABI)) + // The _GLIBCXX_USE_DUAL_ABI macro is always defined in libstdc++ from gcc-5 + // and newer. + using code_unit = wchar_t; +#else + using code_unit = char32_t; +#endif + auto& f = std::use_facet>(loc); + auto mb = std::mbstate_t(); + const char* from_next = nullptr; + code_unit* to_next = nullptr; + constexpr size_t buf_size = 32; + code_unit buf[buf_size] = {}; + auto result = f.in(mb, str.data(), str.data() + str.size(), from_next, buf, + buf + buf_size, to_next); + if (result != std::codecvt_base::ok) + FMT_THROW(format_error("failed to format time")); + str.clear(); + for (code_unit* p = buf; p != to_next; ++p) { + uint32_t c = static_cast(*p); + if (sizeof(code_unit) == 2 && c >= 0xd800 && c <= 0xdfff) { + // surrogate pair + ++p; + if (p == to_next || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) { + FMT_THROW(format_error("failed to format time")); + } + c = (c << 10) + static_cast(*p) - 0x35fdc00; + } + if (c < 0x80) { + str.push_back(static_cast(c)); + } else if (c < 0x800) { + str.push_back(static_cast(0xc0 | (c >> 6))); + str.push_back(static_cast(0x80 | (c & 0x3f))); + } else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) { + str.push_back(static_cast(0xe0 | (c >> 12))); + str.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); + str.push_back(static_cast(0x80 | (c & 0x3f))); + } else if (c >= 0x10000 && c <= 0x10ffff) { + str.push_back(static_cast(0xf0 | (c >> 18))); + str.push_back(static_cast(0x80 | ((c & 0x3ffff) >> 12))); + str.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); + str.push_back(static_cast(0x80 | (c & 0x3f))); + } else { + FMT_THROW(format_error("failed to format time")); + } + } + return str; +} + +template +auto write(OutputIt out, const std::tm& time, const std::locale& loc, + char format, char modifier = 0) -> OutputIt { + auto str = do_write(time, loc, format, modifier); + return std::copy(str.begin(), str.end(), out); +} } // namespace detail -// Thread-safe replacement for std::localtime +FMT_MODULE_EXPORT_BEGIN + +/** + Converts given time since epoch as ``std::time_t`` value into calendar time, + expressed in local time. Unlike ``std::localtime``, this function is + thread-safe on most platforms. + */ inline std::tm localtime(std::time_t time) { struct dispatcher { std::time_t time_; @@ -330,7 +406,11 @@ inline std::tm localtime( return localtime(std::chrono::system_clock::to_time_t(time_point)); } -// Thread-safe replacement for std::gmtime +/** + Converts given time since epoch as ``std::time_t`` value into calendar time, + expressed in Coordinated Universal Time (UTC). Unlike ``std::gmtime``, this + function is thread-safe on most platforms. + */ inline std::tm gmtime(std::time_t time) { struct dispatcher { std::time_t time_; @@ -371,44 +451,84 @@ inline std::tm gmtime( return gmtime(std::chrono::system_clock::to_time_t(time_point)); } -namespace detail { +FMT_BEGIN_DETAIL_NAMESPACE + inline size_t strftime(char* str, size_t count, const char* format, const std::tm* time) { - return std::strftime(str, count, format, time); + // Assign to a pointer to suppress GCCs -Wformat-nonliteral + // First assign the nullptr to suppress -Wsuggest-attribute=format + std::size_t (*strftime)(char*, std::size_t, const char*, const std::tm*) = + nullptr; + strftime = std::strftime; + return strftime(str, count, format, time); } inline size_t strftime(wchar_t* str, size_t count, const wchar_t* format, const std::tm* time) { - return std::wcsftime(str, count, format, time); + // See above + std::size_t (*wcsftime)(wchar_t*, std::size_t, const wchar_t*, + const std::tm*) = nullptr; + wcsftime = std::wcsftime; + return wcsftime(str, count, format, time); } -} // namespace detail -template -struct formatter, Char> - : formatter { +FMT_END_DETAIL_NAMESPACE + +template +struct formatter, + Char> : formatter { + FMT_CONSTEXPR formatter() { + this->specs = {default_specs, sizeof(default_specs) / sizeof(Char)}; + } + + template + FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { + auto it = ctx.begin(); + if (it != ctx.end() && *it == ':') ++it; + auto end = it; + while (end != ctx.end() && *end != '}') ++end; + if (end != it) this->specs = {it, detail::to_unsigned(end - it)}; + return end; + } + template auto format(std::chrono::time_point val, FormatContext& ctx) -> decltype(ctx.out()) { std::tm time = localtime(val); return formatter::format(time, ctx); } + + static constexpr Char default_specs[] = {'%', 'Y', '-', '%', 'm', '-', + '%', 'd', ' ', '%', 'H', ':', + '%', 'M', ':', '%', 'S'}; }; +template +constexpr Char + formatter, + Char>::default_specs[]; + template struct formatter { template - auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { + FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { auto it = ctx.begin(); if (it != ctx.end() && *it == ':') ++it; auto end = it; while (end != ctx.end() && *end != '}') ++end; - tm_format.reserve(detail::to_unsigned(end - it + 1)); - tm_format.append(it, end); - tm_format.push_back('\0'); + specs = {it, detail::to_unsigned(end - it)}; return end; } template - auto format(const std::tm& tm, FormatContext& ctx) -> decltype(ctx.out()) { + auto format(const std::tm& tm, FormatContext& ctx) const + -> decltype(ctx.out()) { + basic_memory_buffer tm_format; + tm_format.append(specs.begin(), specs.end()); + // By appending an extra space we can distinguish an empty result that + // indicates insufficient buffer size from a guaranteed non-empty result + // https://github.com/fmtlib/fmt/issues/2238 + tm_format.push_back(' '); + tm_format.push_back('\0'); basic_memory_buffer buf; size_t start = buf.size(); for (;;) { @@ -418,49 +538,40 @@ template struct formatter { buf.resize(start + count); break; } - if (size >= tm_format.size() * 256) { - // If the buffer is 256 times larger than the format string, assume - // that `strftime` gives an empty result. There doesn't seem to be a - // better way to distinguish the two cases: - // https://github.com/fmtlib/fmt/issues/367 - break; - } const size_t MIN_GROWTH = 10; buf.reserve(buf.capacity() + (size > MIN_GROWTH ? size : MIN_GROWTH)); } - return std::copy(buf.begin(), buf.end(), ctx.out()); + // Remove the extra space. + return std::copy(buf.begin(), buf.end() - 1, ctx.out()); } - basic_memory_buffer tm_format; + basic_string_view specs; }; -namespace detail { -template FMT_CONSTEXPR const char* get_units() { +FMT_BEGIN_DETAIL_NAMESPACE + +template FMT_CONSTEXPR inline const char* get_units() { + if (std::is_same::value) return "as"; + if (std::is_same::value) return "fs"; + if (std::is_same::value) return "ps"; + if (std::is_same::value) return "ns"; + if (std::is_same::value) return "µs"; + if (std::is_same::value) return "ms"; + if (std::is_same::value) return "cs"; + if (std::is_same::value) return "ds"; + if (std::is_same>::value) return "s"; + if (std::is_same::value) return "das"; + if (std::is_same::value) return "hs"; + if (std::is_same::value) return "ks"; + if (std::is_same::value) return "Ms"; + if (std::is_same::value) return "Gs"; + if (std::is_same::value) return "Ts"; + if (std::is_same::value) return "Ps"; + if (std::is_same::value) return "Es"; + if (std::is_same>::value) return "m"; + if (std::is_same>::value) return "h"; return nullptr; } -template <> FMT_CONSTEXPR const char* get_units() { return "as"; } -template <> FMT_CONSTEXPR const char* get_units() { return "fs"; } -template <> FMT_CONSTEXPR const char* get_units() { return "ps"; } -template <> FMT_CONSTEXPR const char* get_units() { return "ns"; } -template <> FMT_CONSTEXPR const char* get_units() { return "µs"; } -template <> FMT_CONSTEXPR const char* get_units() { return "ms"; } -template <> FMT_CONSTEXPR const char* get_units() { return "cs"; } -template <> FMT_CONSTEXPR const char* get_units() { return "ds"; } -template <> FMT_CONSTEXPR const char* get_units>() { return "s"; } -template <> FMT_CONSTEXPR const char* get_units() { return "das"; } -template <> FMT_CONSTEXPR const char* get_units() { return "hs"; } -template <> FMT_CONSTEXPR const char* get_units() { return "ks"; } -template <> FMT_CONSTEXPR const char* get_units() { return "Ms"; } -template <> FMT_CONSTEXPR const char* get_units() { return "Gs"; } -template <> FMT_CONSTEXPR const char* get_units() { return "Ts"; } -template <> FMT_CONSTEXPR const char* get_units() { return "Ps"; } -template <> FMT_CONSTEXPR const char* get_units() { return "Es"; } -template <> FMT_CONSTEXPR const char* get_units>() { - return "m"; -} -template <> FMT_CONSTEXPR const char* get_units>() { - return "h"; -} enum class numeric_system { standard, @@ -626,33 +737,50 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, return ptr; } -struct chrono_format_checker { - FMT_NORETURN void report_no_date() { FMT_THROW(format_error("no date")); } +template struct null_chrono_spec_handler { + FMT_CONSTEXPR void unsupported() { + static_cast(this)->unsupported(); + } + FMT_CONSTEXPR void on_abbr_weekday() { unsupported(); } + FMT_CONSTEXPR void on_full_weekday() { unsupported(); } + FMT_CONSTEXPR void on_dec0_weekday(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_dec1_weekday(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_abbr_month() { unsupported(); } + FMT_CONSTEXPR void on_full_month() { unsupported(); } + FMT_CONSTEXPR void on_24_hour(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_12_hour(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_minute(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_second(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_datetime(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_loc_date(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_loc_time(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_us_date() { unsupported(); } + FMT_CONSTEXPR void on_iso_date() { unsupported(); } + FMT_CONSTEXPR void on_12_hour_time() { unsupported(); } + FMT_CONSTEXPR void on_24_hour_time() { unsupported(); } + FMT_CONSTEXPR void on_iso_time() { unsupported(); } + FMT_CONSTEXPR void on_am_pm() { unsupported(); } + FMT_CONSTEXPR void on_duration_value() { unsupported(); } + FMT_CONSTEXPR void on_duration_unit() { unsupported(); } + FMT_CONSTEXPR void on_utc_offset() { unsupported(); } + FMT_CONSTEXPR void on_tz_name() { unsupported(); } +}; - template void on_text(const Char*, const Char*) {} - FMT_NORETURN void on_abbr_weekday() { report_no_date(); } - FMT_NORETURN void on_full_weekday() { report_no_date(); } - FMT_NORETURN void on_dec0_weekday(numeric_system) { report_no_date(); } - FMT_NORETURN void on_dec1_weekday(numeric_system) { report_no_date(); } - FMT_NORETURN void on_abbr_month() { report_no_date(); } - FMT_NORETURN void on_full_month() { report_no_date(); } - void on_24_hour(numeric_system) {} - void on_12_hour(numeric_system) {} - void on_minute(numeric_system) {} - void on_second(numeric_system) {} - FMT_NORETURN void on_datetime(numeric_system) { report_no_date(); } - FMT_NORETURN void on_loc_date(numeric_system) { report_no_date(); } - FMT_NORETURN void on_loc_time(numeric_system) { report_no_date(); } - FMT_NORETURN void on_us_date() { report_no_date(); } - FMT_NORETURN void on_iso_date() { report_no_date(); } - void on_12_hour_time() {} - void on_24_hour_time() {} - void on_iso_time() {} - void on_am_pm() {} - void on_duration_value() {} - void on_duration_unit() {} - FMT_NORETURN void on_utc_offset() { report_no_date(); } - FMT_NORETURN void on_tz_name() { report_no_date(); } +struct chrono_format_checker : null_chrono_spec_handler { + FMT_NORETURN void unsupported() { FMT_THROW(format_error("no date")); } + + template + FMT_CONSTEXPR void on_text(const Char*, const Char*) {} + FMT_CONSTEXPR void on_24_hour(numeric_system) {} + FMT_CONSTEXPR void on_12_hour(numeric_system) {} + FMT_CONSTEXPR void on_minute(numeric_system) {} + FMT_CONSTEXPR void on_second(numeric_system) {} + FMT_CONSTEXPR void on_12_hour_time() {} + FMT_CONSTEXPR void on_24_hour_time() {} + FMT_CONSTEXPR void on_iso_time() {} + FMT_CONSTEXPR void on_am_pm() {} + FMT_CONSTEXPR void on_duration_value() {} + FMT_CONSTEXPR void on_duration_unit() {} }; template ::value)> @@ -676,7 +804,8 @@ inline bool isfinite(T value) { // Converts value to int and checks that it's in the range [0, upper). template ::value)> inline int to_nonnegative_int(T value, int upper) { - FMT_ASSERT(value >= 0 && value <= upper, "invalid value"); + FMT_ASSERT(value >= 0 && to_unsigned(value) <= to_unsigned(upper), + "invalid value"); (void)upper; return static_cast(value); } @@ -754,15 +883,21 @@ inline std::chrono::duration get_milliseconds( return std::chrono::duration(static_cast(ms)); } -template -OutputIt format_duration_value(OutputIt out, Rep val, int precision) { - const Char pr_f[] = {'{', ':', '.', '{', '}', 'f', '}', 0}; - if (precision >= 0) return format_to(out, pr_f, val, precision); - const Char fp_f[] = {'{', ':', 'g', '}', 0}; - const Char format[] = {'{', '}', 0}; - return format_to(out, std::is_floating_point::value ? fp_f : format, - val); +template ::value)> +OutputIt format_duration_value(OutputIt out, Rep val, int) { + return write(out, val); } + +template ::value)> +OutputIt format_duration_value(OutputIt out, Rep val, int precision) { + auto specs = basic_format_specs(); + specs.precision = precision; + specs.type = precision > 0 ? 'f' : 'g'; + return write(out, val, specs); +} + template OutputIt copy_unit(string_view unit, OutputIt out, Char) { return std::copy(unit.begin(), unit.end(), out); @@ -780,10 +915,15 @@ template OutputIt format_duration_unit(OutputIt out) { if (const char* unit = get_units()) return copy_unit(string_view(unit), out, Char()); - const Char num_f[] = {'[', '{', '}', ']', 's', 0}; - if (const_check(Period::den == 1)) return format_to(out, num_f, Period::num); - const Char num_def_f[] = {'[', '{', '}', '/', '{', '}', ']', 's', 0}; - return format_to(out, num_def_f, Period::num, Period::den); + *out++ = '['; + out = write(out, Period::num); + if (const_check(Period::den != 1)) { + *out++ = '/'; + out = write(out, Period::den); + } + *out++ = ']'; + *out++ = 's'; + return out; } template ::value && sizeof(Rep) < sizeof(int), @@ -886,13 +1027,9 @@ struct chrono_formatter { void format_localized(const tm& time, char format, char modifier = 0) { if (isnan(val)) return write_nan(); - auto locale = context.locale().template get(); - auto& facet = std::use_facet>(locale); - std::basic_ostringstream os; - os.imbue(locale); - facet.put(os, os, ' ', &time, format, modifier); - auto str = os.str(); - std::copy(str.begin(), str.end(), out); + const auto& loc = localized ? context.locale().template get() + : std::locale::classic(); + out = detail::write(out, time, loc, format, modifier); } void on_text(const char_type* begin, const char_type* end) { @@ -1005,17 +1142,59 @@ struct chrono_formatter { out = format_duration_unit(out); } }; -} // namespace detail + +FMT_END_DETAIL_NAMESPACE + +#if defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907 +using weekday = std::chrono::weekday; +#else +// A fallback version of weekday. +class weekday { + private: + unsigned char value; + + public: + weekday() = default; + explicit constexpr weekday(unsigned wd) noexcept + : value(static_cast(wd != 7 ? wd : 0)) {} + constexpr unsigned c_encoding() const noexcept { return value; } +}; +#endif + +// A rudimentary weekday formatter. +template <> struct formatter { + private: + bool localized = false; + + public: + FMT_CONSTEXPR auto parse(format_parse_context& ctx) -> decltype(ctx.begin()) { + auto begin = ctx.begin(), end = ctx.end(); + if (begin != end && *begin == 'L') { + ++begin; + localized = true; + } + return begin; + } + + auto format(weekday wd, format_context& ctx) -> decltype(ctx.out()) { + auto time = std::tm(); + time.tm_wday = static_cast(wd.c_encoding()); + const auto& loc = localized ? ctx.locale().template get() + : std::locale::classic(); + return detail::write(ctx.out(), time, loc, 'a'); + } +}; template struct formatter, Char> { private: basic_format_specs specs; - int precision; + int precision = -1; using arg_ref_type = detail::arg_ref; arg_ref_type width_ref; arg_ref_type precision_ref; - mutable basic_string_view format_str; + bool localized = false; + basic_string_view format_str; using duration = std::chrono::duration; struct spec_handler { @@ -1038,17 +1217,21 @@ struct formatter, Char> { } void on_error(const char* msg) { FMT_THROW(format_error(msg)); } - void on_fill(basic_string_view fill) { f.specs.fill = fill; } - void on_align(align_t align) { f.specs.align = align; } - void on_width(int width) { f.specs.width = width; } - void on_precision(int _precision) { f.precision = _precision; } - void end_precision() {} + FMT_CONSTEXPR void on_fill(basic_string_view fill) { + f.specs.fill = fill; + } + FMT_CONSTEXPR void on_align(align_t align) { f.specs.align = align; } + FMT_CONSTEXPR void on_width(int width) { f.specs.width = width; } + FMT_CONSTEXPR void on_precision(int _precision) { + f.precision = _precision; + } + FMT_CONSTEXPR void end_precision() {} - template void on_dynamic_width(Id arg_id) { + template FMT_CONSTEXPR void on_dynamic_width(Id arg_id) { f.width_ref = make_arg_ref(arg_id); } - template void on_dynamic_precision(Id arg_id) { + template FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) { f.precision_ref = make_arg_ref(arg_id); } }; @@ -1073,13 +1256,15 @@ struct formatter, Char> { else handler.on_error("precision not allowed for this argument type"); } + if (begin != end && *begin == 'L') { + ++begin; + localized = true; + } end = parse_chrono_format(begin, end, detail::chrono_format_checker()); return {begin, end}; } public: - formatter() : precision(-1) {} - FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) -> decltype(ctx.begin()) { auto range = do_parse(ctx); @@ -1089,30 +1274,35 @@ struct formatter, Char> { } template - auto format(const duration& d, FormatContext& ctx) -> decltype(ctx.out()) { + auto format(const duration& d, FormatContext& ctx) const + -> decltype(ctx.out()) { + auto specs_copy = specs; + auto precision_copy = precision; auto begin = format_str.begin(), end = format_str.end(); // As a possible future optimization, we could avoid extra copying if width // is not specified. basic_memory_buffer buf; auto out = std::back_inserter(buf); - detail::handle_dynamic_spec(specs.width, width_ref, - ctx); - detail::handle_dynamic_spec(precision, + detail::handle_dynamic_spec(specs_copy.width, + width_ref, ctx); + detail::handle_dynamic_spec(precision_copy, precision_ref, ctx); if (begin == end || *begin == '}') { - out = detail::format_duration_value(out, d.count(), precision); + out = detail::format_duration_value(out, d.count(), precision_copy); detail::format_duration_unit(out); } else { detail::chrono_formatter f( ctx, out, d); - f.precision = precision; - parse_chrono_format(begin, end, f); + f.precision = precision_copy; + f.localized = localized; + detail::parse_chrono_format(begin, end, f); } return detail::write( - ctx.out(), basic_string_view(buf.data(), buf.size()), specs); + ctx.out(), basic_string_view(buf.data(), buf.size()), specs_copy); } }; +FMT_MODULE_EXPORT_END FMT_END_NAMESPACE #endif // FMT_CHRONO_H_ diff --git a/src/fmt/color.h b/src/fmt/color.h index 94e3419d1d..7fa5490e44 100644 --- a/src/fmt/color.h +++ b/src/fmt/color.h @@ -10,7 +10,15 @@ #include "format.h" +// __declspec(deprecated) is broken in some MSVC versions. +#if FMT_MSC_VER +# define FMT_DEPRECATED_NONMSVC +#else +# define FMT_DEPRECATED_NONMSVC FMT_DEPRECATED +#endif + FMT_BEGIN_NAMESPACE +FMT_MODULE_EXPORT_BEGIN enum class color : uint32_t { alice_blue = 0xF0F8FF, // rgb(240,248,255) @@ -198,7 +206,7 @@ struct rgb { uint8_t b; }; -namespace detail { +FMT_BEGIN_DETAIL_NAMESPACE // color is a struct of either a rgb color or a terminal color. struct color_type { @@ -221,9 +229,10 @@ struct color_type { uint32_t rgb_color; } value; }; -} // namespace detail -// Experimental text formatting support. +FMT_END_DETAIL_NAMESPACE + +/** A text style consisting of foreground and background colors and emphasis. */ class text_style { public: FMT_CONSTEXPR text_style(emphasis em = emphasis()) FMT_NOEXCEPT @@ -260,33 +269,14 @@ class text_style { return lhs |= rhs; } - FMT_CONSTEXPR text_style& operator&=(const text_style& rhs) { - if (!set_foreground_color) { - set_foreground_color = rhs.set_foreground_color; - foreground_color = rhs.foreground_color; - } else if (rhs.set_foreground_color) { - if (!foreground_color.is_rgb || !rhs.foreground_color.is_rgb) - FMT_THROW(format_error("can't AND a terminal color")); - foreground_color.value.rgb_color &= rhs.foreground_color.value.rgb_color; - } - - if (!set_background_color) { - set_background_color = rhs.set_background_color; - background_color = rhs.background_color; - } else if (rhs.set_background_color) { - if (!background_color.is_rgb || !rhs.background_color.is_rgb) - FMT_THROW(format_error("can't AND a terminal color")); - background_color.value.rgb_color &= rhs.background_color.value.rgb_color; - } - - ems = static_cast(static_cast(ems) & - static_cast(rhs.ems)); - return *this; + FMT_DEPRECATED_NONMSVC FMT_CONSTEXPR text_style& operator&=( + const text_style& rhs) { + return and_assign(rhs); } - friend FMT_CONSTEXPR text_style operator&(text_style lhs, - const text_style& rhs) { - return lhs &= rhs; + FMT_DEPRECATED_NONMSVC friend FMT_CONSTEXPR text_style + operator&(text_style lhs, const text_style& rhs) { + return lhs.and_assign(rhs); } FMT_CONSTEXPR bool has_foreground() const FMT_NOEXCEPT { @@ -326,8 +316,34 @@ class text_style { } } + // DEPRECATED! + FMT_CONSTEXPR text_style& and_assign(const text_style& rhs) { + if (!set_foreground_color) { + set_foreground_color = rhs.set_foreground_color; + foreground_color = rhs.foreground_color; + } else if (rhs.set_foreground_color) { + if (!foreground_color.is_rgb || !rhs.foreground_color.is_rgb) + FMT_THROW(format_error("can't AND a terminal color")); + foreground_color.value.rgb_color &= rhs.foreground_color.value.rgb_color; + } + + if (!set_background_color) { + set_background_color = rhs.set_background_color; + background_color = rhs.background_color; + } else if (rhs.set_background_color) { + if (!background_color.is_rgb || !rhs.background_color.is_rgb) + FMT_THROW(format_error("can't AND a terminal color")); + background_color.value.rgb_color &= rhs.background_color.value.rgb_color; + } + + ems = static_cast(static_cast(ems) & + static_cast(rhs.ems)); + return *this; + } + friend FMT_CONSTEXPR_DECL text_style fg(detail::color_type foreground) FMT_NOEXCEPT; + friend FMT_CONSTEXPR_DECL text_style bg(detail::color_type background) FMT_NOEXCEPT; @@ -338,19 +354,22 @@ class text_style { emphasis ems; }; -FMT_CONSTEXPR text_style fg(detail::color_type foreground) FMT_NOEXCEPT { - return text_style(/*is_foreground=*/true, foreground); +/** Creates a text style from the foreground (text) color. */ +FMT_CONSTEXPR inline text_style fg(detail::color_type foreground) FMT_NOEXCEPT { + return text_style(true, foreground); } -FMT_CONSTEXPR text_style bg(detail::color_type background) FMT_NOEXCEPT { - return text_style(/*is_foreground=*/false, background); +/** Creates a text style from the background color. */ +FMT_CONSTEXPR inline text_style bg(detail::color_type background) FMT_NOEXCEPT { + return text_style(false, background); } -FMT_CONSTEXPR text_style operator|(emphasis lhs, emphasis rhs) FMT_NOEXCEPT { +FMT_CONSTEXPR inline text_style operator|(emphasis lhs, + emphasis rhs) FMT_NOEXCEPT { return text_style(lhs) | rhs; } -namespace detail { +FMT_BEGIN_DETAIL_NAMESPACE template struct ansi_color_escape { FMT_CONSTEXPR ansi_color_escape(detail::color_type text_color, @@ -358,7 +377,7 @@ template struct ansi_color_escape { // If we have a terminal color, we need to output another escape code // sequence. if (!text_color.is_rgb) { - bool is_background = esc == detail::data::background_color; + bool is_background = esc == string_view("\x1b[48;2;"); uint32_t value = text_color.value.term_color; // Background ASCII codes are the same as the foreground ones but with // 10 more. @@ -411,7 +430,7 @@ template struct ansi_color_escape { FMT_CONSTEXPR operator const Char*() const FMT_NOEXCEPT { return buffer; } FMT_CONSTEXPR const Char* begin() const FMT_NOEXCEPT { return buffer; } - FMT_CONSTEXPR const Char* end() const FMT_NOEXCEPT { + FMT_CONSTEXPR_CHAR_TRAITS const Char* end() const FMT_NOEXCEPT { return buffer + std::char_traits::length(buffer); } @@ -430,13 +449,13 @@ template struct ansi_color_escape { template FMT_CONSTEXPR ansi_color_escape make_foreground_color( detail::color_type foreground) FMT_NOEXCEPT { - return ansi_color_escape(foreground, detail::data::foreground_color); + return ansi_color_escape(foreground, "\x1b[38;2;"); } template FMT_CONSTEXPR ansi_color_escape make_background_color( detail::color_type background) FMT_NOEXCEPT { - return ansi_color_escape(background, detail::data::background_color); + return ansi_color_escape(background, "\x1b[48;2;"); } template @@ -455,18 +474,17 @@ inline void fputs(const wchar_t* chars, FILE* stream) FMT_NOEXCEPT { } template inline void reset_color(FILE* stream) FMT_NOEXCEPT { - fputs(detail::data::reset_color, stream); + fputs("\x1b[0m", stream); } template <> inline void reset_color(FILE* stream) FMT_NOEXCEPT { - fputs(detail::data::wreset_color, stream); + fputs(L"\x1b[0m", stream); } template inline void reset_color(buffer& buffer) FMT_NOEXCEPT { - const char* begin = data::reset_color; - const char* end = begin + sizeof(data::reset_color) - 1; - buffer.append(begin, end); + auto reset_color = string_view("\x1b[0m"); + buffer.append(reset_color.begin(), reset_color.end()); } template @@ -492,7 +510,8 @@ void vformat_to(buffer& buf, const text_style& ts, detail::vformat_to(buf, format_str, args); if (has_style) detail::reset_color(buf); } -} // namespace detail + +FMT_END_DETAIL_NAMESPACE template > void vprint(std::FILE* f, const text_style& ts, const S& format, @@ -523,11 +542,15 @@ void print(std::FILE* f, const text_style& ts, const S& format_str, } /** + \rst Formats a string and prints it to stdout using ANSI escape sequences to specify text formatting. - Example: + + **Example**:: + fmt::print(fmt::emphasis::bold | fg(fmt::color::red), "Elapsed time: {0:.2f} seconds", 1.23); + \endrst */ template ::value)> @@ -559,8 +582,8 @@ inline std::basic_string vformat( template > inline std::basic_string format(const text_style& ts, const S& format_str, const Args&... args) { - return vformat(ts, to_string_view(format_str), - fmt::make_args_checked(format_str, args...)); + return fmt::vformat(ts, to_string_view(format_str), + fmt::make_args_checked(format_str, args...)); } /** @@ -571,7 +594,7 @@ template format_str, basic_format_args>> args) { - decltype(detail::get_buffer(out)) buf(detail::get_buffer_init(out)); + auto&& buf = detail::get_buffer(out); detail::vformat_to(buf, ts, format_str, args); return detail::get_iterator(buf); } @@ -598,6 +621,7 @@ inline auto format_to(OutputIt out, const text_style& ts, const S& format_str, fmt::make_args_checked(format_str, args...)); } +FMT_MODULE_EXPORT_END FMT_END_NAMESPACE #endif // FMT_COLOR_H_ diff --git a/src/fmt/compile.h b/src/fmt/compile.h index 3a33b02014..00000c92e3 100644 --- a/src/fmt/compile.h +++ b/src/fmt/compile.h @@ -8,13 +8,135 @@ #ifndef FMT_COMPILE_H_ #define FMT_COMPILE_H_ -#include - #include "format.h" FMT_BEGIN_NAMESPACE namespace detail { +// An output iterator that counts the number of objects written to it and +// discards them. +class counting_iterator { + private: + size_t count_; + + public: + using iterator_category = std::output_iterator_tag; + using difference_type = std::ptrdiff_t; + using pointer = void; + using reference = void; + using _Unchecked_type = counting_iterator; // Mark iterator as checked. + + struct value_type { + template void operator=(const T&) {} + }; + + counting_iterator() : count_(0) {} + + size_t count() const { return count_; } + + counting_iterator& operator++() { + ++count_; + return *this; + } + counting_iterator operator++(int) { + auto it = *this; + ++*this; + return it; + } + + friend counting_iterator operator+(counting_iterator it, difference_type n) { + it.count_ += static_cast(n); + return it; + } + + value_type operator*() const { return {}; } +}; + +template +inline counting_iterator copy_str(InputIt begin, InputIt end, + counting_iterator it) { + return it + (end - begin); +} + +template class truncating_iterator_base { + protected: + OutputIt out_; + size_t limit_; + size_t count_ = 0; + + truncating_iterator_base() : out_(), limit_(0) {} + + truncating_iterator_base(OutputIt out, size_t limit) + : out_(out), limit_(limit) {} + + public: + using iterator_category = std::output_iterator_tag; + using value_type = typename std::iterator_traits::value_type; + using difference_type = std::ptrdiff_t; + using pointer = void; + using reference = void; + using _Unchecked_type = + truncating_iterator_base; // Mark iterator as checked. + + OutputIt base() const { return out_; } + size_t count() const { return count_; } +}; + +// An output iterator that truncates the output and counts the number of objects +// written to it. +template ::value_type>::type> +class truncating_iterator; + +template +class truncating_iterator + : public truncating_iterator_base { + mutable typename truncating_iterator_base::value_type blackhole_; + + public: + using value_type = typename truncating_iterator_base::value_type; + + truncating_iterator() = default; + + truncating_iterator(OutputIt out, size_t limit) + : truncating_iterator_base(out, limit) {} + + truncating_iterator& operator++() { + if (this->count_++ < this->limit_) ++this->out_; + return *this; + } + + truncating_iterator operator++(int) { + auto it = *this; + ++*this; + return it; + } + + value_type& operator*() const { + return this->count_ < this->limit_ ? *this->out_ : blackhole_; + } +}; + +template +class truncating_iterator + : public truncating_iterator_base { + public: + truncating_iterator() = default; + + truncating_iterator(OutputIt out, size_t limit) + : truncating_iterator_base(out, limit) {} + + template truncating_iterator& operator=(T val) { + if (this->count_++ < this->limit_) *this->out_++ = val; + return *this; + } + + truncating_iterator& operator++() { return *this; } + truncating_iterator& operator++(int) { return *this; } + truncating_iterator& operator*() { return *this; } +}; + // A compile-time string which is compiled into fast formatting code. class compiled_string {}; @@ -34,335 +156,29 @@ struct is_compiled_string : std::is_base_of {}; std::string s = fmt::format(FMT_COMPILE("{}"), 42); \endrst */ -#define FMT_COMPILE(s) FMT_STRING_IMPL(s, fmt::detail::compiled_string) +#ifdef __cpp_if_constexpr +# define FMT_COMPILE(s) \ + FMT_STRING_IMPL(s, fmt::detail::compiled_string, explicit) +#else +# define FMT_COMPILE(s) FMT_STRING(s) +#endif + +#if FMT_USE_NONTYPE_TEMPLATE_PARAMETERS +template Str> +struct udl_compiled_string : compiled_string { + using char_type = Char; + constexpr operator basic_string_view() const { + return {Str.data, N - 1}; + } +}; +#endif template const T& first(const T& value, const Tail&...) { return value; } -// Part of a compiled format string. It can be either literal text or a -// replacement field. -template struct format_part { - enum class kind { arg_index, arg_name, text, replacement }; - - struct replacement { - arg_ref arg_id; - dynamic_format_specs specs; - }; - - kind part_kind; - union value { - int arg_index; - basic_string_view str; - replacement repl; - - FMT_CONSTEXPR value(int index = 0) : arg_index(index) {} - FMT_CONSTEXPR value(basic_string_view s) : str(s) {} - FMT_CONSTEXPR value(replacement r) : repl(r) {} - } val; - // Position past the end of the argument id. - const Char* arg_id_end = nullptr; - - FMT_CONSTEXPR format_part(kind k = kind::arg_index, value v = {}) - : part_kind(k), val(v) {} - - static FMT_CONSTEXPR format_part make_arg_index(int index) { - return format_part(kind::arg_index, index); - } - static FMT_CONSTEXPR format_part make_arg_name(basic_string_view name) { - return format_part(kind::arg_name, name); - } - static FMT_CONSTEXPR format_part make_text(basic_string_view text) { - return format_part(kind::text, text); - } - static FMT_CONSTEXPR format_part make_replacement(replacement repl) { - return format_part(kind::replacement, repl); - } -}; - -template struct part_counter { - unsigned num_parts = 0; - - FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) { - if (begin != end) ++num_parts; - } - - FMT_CONSTEXPR int on_arg_id() { return ++num_parts, 0; } - FMT_CONSTEXPR int on_arg_id(int) { return ++num_parts, 0; } - FMT_CONSTEXPR int on_arg_id(basic_string_view) { - return ++num_parts, 0; - } - - FMT_CONSTEXPR void on_replacement_field(int, const Char*) {} - - FMT_CONSTEXPR const Char* on_format_specs(int, const Char* begin, - const Char* end) { - // Find the matching brace. - unsigned brace_counter = 0; - for (; begin != end; ++begin) { - if (*begin == '{') { - ++brace_counter; - } else if (*begin == '}') { - if (brace_counter == 0u) break; - --brace_counter; - } - } - return begin; - } - - FMT_CONSTEXPR void on_error(const char*) {} -}; - -// Counts the number of parts in a format string. -template -FMT_CONSTEXPR unsigned count_parts(basic_string_view format_str) { - part_counter counter; - parse_format_string(format_str, counter); - return counter.num_parts; -} - -template -class format_string_compiler : public error_handler { - private: - using part = format_part; - - PartHandler handler_; - part part_; - basic_string_view format_str_; - basic_format_parse_context parse_context_; - - public: - FMT_CONSTEXPR format_string_compiler(basic_string_view format_str, - PartHandler handler) - : handler_(handler), - format_str_(format_str), - parse_context_(format_str) {} - - FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) { - if (begin != end) - handler_(part::make_text({begin, to_unsigned(end - begin)})); - } - - FMT_CONSTEXPR int on_arg_id() { - part_ = part::make_arg_index(parse_context_.next_arg_id()); - return 0; - } - - FMT_CONSTEXPR int on_arg_id(int id) { - parse_context_.check_arg_id(id); - part_ = part::make_arg_index(id); - return 0; - } - - FMT_CONSTEXPR int on_arg_id(basic_string_view id) { - part_ = part::make_arg_name(id); - return 0; - } - - FMT_CONSTEXPR void on_replacement_field(int, const Char* ptr) { - part_.arg_id_end = ptr; - handler_(part_); - } - - FMT_CONSTEXPR const Char* on_format_specs(int, const Char* begin, - const Char* end) { - auto repl = typename part::replacement(); - dynamic_specs_handler> handler( - repl.specs, parse_context_); - auto it = parse_format_specs(begin, end, handler); - if (*it != '}') on_error("missing '}' in format string"); - repl.arg_id = part_.part_kind == part::kind::arg_index - ? arg_ref(part_.val.arg_index) - : arg_ref(part_.val.str); - auto part = part::make_replacement(repl); - part.arg_id_end = begin; - handler_(part); - return it; - } -}; - -// Compiles a format string and invokes handler(part) for each parsed part. -template -FMT_CONSTEXPR void compile_format_string(basic_string_view format_str, - PartHandler handler) { - parse_format_string( - format_str, - format_string_compiler(format_str, handler)); -} - -template -void format_arg( - basic_format_parse_context& parse_ctx, - Context& ctx, Id arg_id) { - ctx.advance_to(visit_format_arg( - arg_formatter(ctx, &parse_ctx), - ctx.arg(arg_id))); -} - -// vformat_to is defined in a subnamespace to prevent ADL. -namespace cf { -template -auto vformat_to(OutputIt out, CompiledFormat& cf, - basic_format_args args) -> typename Context::iterator { - using char_type = typename Context::char_type; - basic_format_parse_context parse_ctx( - to_string_view(cf.format_str_)); - Context ctx(out, args); - - const auto& parts = cf.parts(); - for (auto part_it = std::begin(parts); part_it != std::end(parts); - ++part_it) { - const auto& part = *part_it; - const auto& value = part.val; - - using format_part_t = format_part; - switch (part.part_kind) { - case format_part_t::kind::text: { - const auto text = value.str; - auto output = ctx.out(); - auto&& it = reserve(output, text.size()); - it = std::copy_n(text.begin(), text.size(), it); - ctx.advance_to(output); - break; - } - - case format_part_t::kind::arg_index: - advance_to(parse_ctx, part.arg_id_end); - detail::format_arg(parse_ctx, ctx, value.arg_index); - break; - - case format_part_t::kind::arg_name: - advance_to(parse_ctx, part.arg_id_end); - detail::format_arg(parse_ctx, ctx, value.str); - break; - - case format_part_t::kind::replacement: { - const auto& arg_id_value = value.repl.arg_id.val; - const auto arg = value.repl.arg_id.kind == arg_id_kind::index - ? ctx.arg(arg_id_value.index) - : ctx.arg(arg_id_value.name); - - auto specs = value.repl.specs; - - handle_dynamic_spec(specs.width, specs.width_ref, ctx); - handle_dynamic_spec(specs.precision, - specs.precision_ref, ctx); - - error_handler h; - numeric_specs_checker checker(h, arg.type()); - if (specs.align == align::numeric) checker.require_numeric_argument(); - if (specs.sign != sign::none) checker.check_sign(); - if (specs.alt) checker.require_numeric_argument(); - if (specs.precision >= 0) checker.check_precision(); - - advance_to(parse_ctx, part.arg_id_end); - ctx.advance_to( - visit_format_arg(arg_formatter( - ctx, nullptr, &specs), - arg)); - break; - } - } - } - return ctx.out(); -} -} // namespace cf - -struct basic_compiled_format {}; - -template -struct compiled_format_base : basic_compiled_format { - using char_type = char_t; - using parts_container = std::vector>; - - parts_container compiled_parts; - - explicit compiled_format_base(basic_string_view format_str) { - compile_format_string(format_str, - [this](const format_part& part) { - compiled_parts.push_back(part); - }); - } - - const parts_container& parts() const { return compiled_parts; } -}; - -template struct format_part_array { - format_part data[N] = {}; - FMT_CONSTEXPR format_part_array() = default; -}; - -template -FMT_CONSTEXPR format_part_array compile_to_parts( - basic_string_view format_str) { - format_part_array parts; - unsigned counter = 0; - // This is not a lambda for compatibility with older compilers. - struct { - format_part* parts; - unsigned* counter; - FMT_CONSTEXPR void operator()(const format_part& part) { - parts[(*counter)++] = part; - } - } collector{parts.data, &counter}; - compile_format_string(format_str, collector); - if (counter < N) { - parts.data[counter] = - format_part::make_text(basic_string_view()); - } - return parts; -} - -template constexpr const T& constexpr_max(const T& a, const T& b) { - return (a < b) ? b : a; -} - -template -struct compiled_format_base::value>> - : basic_compiled_format { - using char_type = char_t; - - FMT_CONSTEXPR explicit compiled_format_base(basic_string_view) {} - -// Workaround for old compilers. Format string compilation will not be -// performed there anyway. -#if FMT_USE_CONSTEXPR - static FMT_CONSTEXPR_DECL const unsigned num_format_parts = - constexpr_max(count_parts(to_string_view(S())), 1u); -#else - static const unsigned num_format_parts = 1; -#endif - - using parts_container = format_part[num_format_parts]; - - const parts_container& parts() const { - static FMT_CONSTEXPR_DECL const auto compiled_parts = - compile_to_parts( - detail::to_string_view(S())); - return compiled_parts.data; - } -}; - -template -class compiled_format : private compiled_format_base { - public: - using typename compiled_format_base::char_type; - - private: - basic_string_view format_str_; - - template - friend auto cf::vformat_to(OutputIt out, CompiledFormat& cf, - basic_format_args args) -> - typename Context::iterator; - - public: - compiled_format() = delete; - explicit constexpr compiled_format(basic_string_view format_str) - : compiled_format_base(format_str), format_str_(format_str) {} -}; - #ifdef __cpp_if_constexpr template struct type_list {}; @@ -377,6 +193,12 @@ constexpr const auto& get([[maybe_unused]] const T& first, return get(rest...); } +template +constexpr int get_arg_index_by_name(basic_string_view name, + type_list) { + return get_arg_index_by_name(name); +} + template struct get_type_impl; template struct get_type_impl> { @@ -393,7 +215,7 @@ template struct text { using char_type = Char; template - OutputIt format(OutputIt out, const Args&...) const { + constexpr OutputIt format(OutputIt out, const Args&...) const { return write(out, data); } }; @@ -412,11 +234,22 @@ template struct code_unit { using char_type = Char; template - OutputIt format(OutputIt out, const Args&...) const { + constexpr OutputIt format(OutputIt out, const Args&...) const { return write(out, value); } }; +// This ensures that the argument type is convertible to `const T&`. +template +constexpr const T& get_arg_checked(const Args&... args) { + const auto& arg = get(args...); + if constexpr (detail::is_named_arg>()) { + return arg.value; + } else { + return arg; + } +} + template struct is_compiled_format> : std::true_type {}; @@ -425,29 +258,58 @@ template struct field { using char_type = Char; template - OutputIt format(OutputIt out, const Args&... args) const { - // This ensures that the argument type is convertile to `const T&`. - const T& arg = get(args...); - return write(out, arg); + constexpr OutputIt format(OutputIt out, const Args&... args) const { + return write(out, get_arg_checked(args...)); } }; template struct is_compiled_format> : std::true_type {}; +// A replacement field that refers to argument with name. +template struct runtime_named_field { + using char_type = Char; + basic_string_view name; + + template + constexpr static bool try_format_argument( + OutputIt& out, + // [[maybe_unused]] due to unused-but-set-parameter warning in GCC 7,8,9 + [[maybe_unused]] basic_string_view arg_name, const T& arg) { + if constexpr (is_named_arg::type>::value) { + if (arg_name == arg.name) { + out = write(out, arg.value); + return true; + } + } + return false; + } + + template + constexpr OutputIt format(OutputIt out, const Args&... args) const { + bool found = (try_format_argument(out, name, args) || ...); + if (!found) { + throw format_error("argument with specified name is not found"); + } + return out; + } +}; + +template +struct is_compiled_format> : std::true_type {}; + // A replacement field that refers to argument N and has format specifiers. template struct spec_field { using char_type = Char; - mutable formatter fmt; + formatter fmt; template - OutputIt format(OutputIt out, const Args&... args) const { - // This ensures that the argument type is convertile to `const T&`. - const T& arg = get(args...); + constexpr FMT_INLINE OutputIt format(OutputIt out, + const Args&... args) const { const auto& vargs = - make_format_args>(args...); + fmt::make_format_args>(args...); basic_format_context ctx(out, vargs); - return fmt.format(arg, ctx); + return fmt.format(get_arg_checked(args...), ctx); } }; @@ -460,7 +322,7 @@ template struct concat { using char_type = typename L::char_type; template - OutputIt format(OutputIt out, const Args&... args) const { + constexpr OutputIt format(OutputIt out, const Args&... args) const { out = lhs.format(out, args...); return rhs.format(out, args...); } @@ -508,14 +370,77 @@ template struct parse_specs_result { int next_arg_id; }; +constexpr int manual_indexing_id = -1; + template constexpr parse_specs_result parse_specs(basic_string_view str, - size_t pos, int arg_id) { + size_t pos, int next_arg_id) { str.remove_prefix(pos); - auto ctx = basic_format_parse_context(str, {}, arg_id + 1); + auto ctx = basic_format_parse_context(str, {}, next_arg_id); auto f = formatter(); auto end = f.parse(ctx); - return {f, pos + (end - str.data()) + 1, ctx.next_arg_id()}; + return {f, pos + fmt::detail::to_unsigned(end - str.data()) + 1, + next_arg_id == 0 ? manual_indexing_id : ctx.next_arg_id()}; +} + +template struct arg_id_handler { + arg_ref arg_id; + + constexpr int operator()() { + FMT_ASSERT(false, "handler cannot be used with automatic indexing"); + return 0; + } + constexpr int operator()(int id) { + arg_id = arg_ref(id); + return 0; + } + constexpr int operator()(basic_string_view id) { + arg_id = arg_ref(id); + return 0; + } + + constexpr void on_error(const char* message) { throw format_error(message); } +}; + +template struct parse_arg_id_result { + arg_ref arg_id; + const Char* arg_id_end; +}; + +template +constexpr auto parse_arg_id(const Char* begin, const Char* end) { + auto handler = arg_id_handler{arg_ref{}}; + auto arg_id_end = parse_arg_id(begin, end, handler); + return parse_arg_id_result{handler.arg_id, arg_id_end}; +} + +template struct field_type { + using type = remove_cvref_t; +}; + +template +struct field_type::value>> { + using type = remove_cvref_t; +}; + +template +constexpr auto parse_replacement_field_then_tail(S format_str) { + using char_type = typename S::char_type; + constexpr auto str = basic_string_view(format_str); + constexpr char_type c = END_POS != str.size() ? str[END_POS] : char_type(); + if constexpr (c == '}') { + return parse_tail( + field::type, ARG_INDEX>(), + format_str); + } else if constexpr (c == ':') { + constexpr auto result = parse_specs::type>( + str, END_POS + 1, NEXT_ID == manual_indexing_id ? 0 : NEXT_ID); + return parse_tail( + spec_field::type, ARG_INDEX>{ + result.fmt}, + format_str); + } } // Compiles a non-empty format string and returns the compiled representation @@ -523,26 +448,58 @@ constexpr parse_specs_result parse_specs(basic_string_view str, template constexpr auto compile_format_string(S format_str) { using char_type = typename S::char_type; - constexpr basic_string_view str = format_str; + constexpr auto str = basic_string_view(format_str); if constexpr (str[POS] == '{') { - if (POS + 1 == str.size()) + if constexpr (POS + 1 == str.size()) throw format_error("unmatched '{' in format string"); if constexpr (str[POS + 1] == '{') { return parse_tail(make_text(str, POS, 1), format_str); - } else if constexpr (str[POS + 1] == '}') { - using type = get_type; - return parse_tail(field(), - format_str); - } else if constexpr (str[POS + 1] == ':') { - using type = get_type; - constexpr auto result = parse_specs(str, POS + 2, ID); - return parse_tail( - spec_field{result.fmt}, format_str); + } else if constexpr (str[POS + 1] == '}' || str[POS + 1] == ':') { + static_assert(ID != manual_indexing_id, + "cannot switch from manual to automatic argument indexing"); + constexpr auto next_id = + ID != manual_indexing_id ? ID + 1 : manual_indexing_id; + return parse_replacement_field_then_tail, Args, + POS + 1, ID, next_id>( + format_str); } else { - return unknown_format(); + constexpr auto arg_id_result = + parse_arg_id(str.data() + POS + 1, str.data() + str.size()); + constexpr auto arg_id_end_pos = arg_id_result.arg_id_end - str.data(); + constexpr char_type c = + arg_id_end_pos != str.size() ? str[arg_id_end_pos] : char_type(); + static_assert(c == '}' || c == ':', "missing '}' in format string"); + if constexpr (arg_id_result.arg_id.kind == arg_id_kind::index) { + static_assert( + ID == manual_indexing_id || ID == 0, + "cannot switch from automatic to manual argument indexing"); + constexpr auto arg_index = arg_id_result.arg_id.val.index; + return parse_replacement_field_then_tail, + Args, arg_id_end_pos, + arg_index, manual_indexing_id>( + format_str); + } else if constexpr (arg_id_result.arg_id.kind == arg_id_kind::name) { + constexpr auto arg_index = + get_arg_index_by_name(arg_id_result.arg_id.val.name, Args{}); + if constexpr (arg_index != invalid_arg_index) { + constexpr auto next_id = + ID != manual_indexing_id ? ID + 1 : manual_indexing_id; + return parse_replacement_field_then_tail< + decltype(get_type::value), Args, arg_id_end_pos, + arg_index, next_id>(format_str); + } else { + if constexpr (c == '}') { + return parse_tail( + runtime_named_field{arg_id_result.arg_id.val.name}, + format_str); + } else if constexpr (c == ':') { + return unknown_format(); // no type info for specs parsing + } + } + } } } else if constexpr (str[POS] == '}') { - if (POS + 1 == str.size()) + if constexpr (POS + 1 == str.size()) throw format_error("unmatched '}' in format string"); return parse_tail(make_text(str, POS, 1), format_str); } else { @@ -558,144 +515,125 @@ constexpr auto compile_format_string(S format_str) { } template ::value || - detail::is_compiled_string::value)> + FMT_ENABLE_IF(detail::is_compiled_string::value)> constexpr auto compile(S format_str) { - constexpr basic_string_view str = format_str; + constexpr auto str = basic_string_view(format_str); if constexpr (str.size() == 0) { return detail::make_text(str, 0, 0); } else { constexpr auto result = detail::compile_format_string, 0, 0>( format_str); - if constexpr (std::is_same, - detail::unknown_format>()) { - return detail::compiled_format(to_string_view(format_str)); - } else { - return result; - } + return result; } } -#else -template ::value)> -constexpr auto compile(S format_str) -> detail::compiled_format { - return detail::compiled_format(to_string_view(format_str)); -} #endif // __cpp_if_constexpr - -// Compiles the format string which must be a string literal. -template -auto compile(const Char (&format_str)[N]) - -> detail::compiled_format { - return detail::compiled_format( - basic_string_view(format_str, N - 1)); -} } // namespace detail -// DEPRECATED! use FMT_COMPILE instead. -template -FMT_DEPRECATED auto compile(const Args&... args) - -> decltype(detail::compile(args...)) { - return detail::compile(args...); -} +FMT_MODULE_EXPORT_BEGIN -#if FMT_USE_CONSTEXPR -# ifdef __cpp_if_constexpr +#ifdef __cpp_if_constexpr template ::value)> FMT_INLINE std::basic_string format(const CompiledFormat& cf, const Args&... args) { - basic_memory_buffer buffer; - cf.format(detail::buffer_appender(buffer), args...); - return to_string(buffer); + auto s = std::basic_string(); + cf.format(std::back_inserter(s), args...); + return s; } template ::value)> -OutputIt format_to(OutputIt out, const CompiledFormat& cf, - const Args&... args) { +constexpr FMT_INLINE OutputIt format_to(OutputIt out, const CompiledFormat& cf, + const Args&... args) { return cf.format(out, args...); } -# endif // __cpp_if_constexpr -#endif // FMT_USE_CONSTEXPR - -template ::value)> -std::basic_string format(const CompiledFormat& cf, const Args&... args) { - basic_memory_buffer buffer; - using context = buffer_context; - detail::cf::vformat_to(detail::buffer_appender(buffer), cf, - make_format_args(args...)); - return to_string(buffer); -} template ::value)> FMT_INLINE std::basic_string format(const S&, Args&&... args) { -#ifdef __cpp_if_constexpr if constexpr (std::is_same::value) { - constexpr basic_string_view str = S(); - if (str.size() == 2 && str[0] == '{' && str[1] == '}') - return fmt::to_string(detail::first(args...)); + constexpr auto str = basic_string_view(S()); + if constexpr (str.size() == 2 && str[0] == '{' && str[1] == '}') { + const auto& first = detail::first(args...); + if constexpr (detail::is_named_arg< + remove_cvref_t>::value) { + return fmt::to_string(first.value); + } else { + return fmt::to_string(first); + } + } } + constexpr auto compiled = detail::compile(S()); + if constexpr (std::is_same, + detail::unknown_format>()) { + return format(static_cast>(S()), + std::forward(args)...); + } else { + return format(compiled, std::forward(args)...); + } +} + +template ::value)> +FMT_CONSTEXPR OutputIt format_to(OutputIt out, const S&, Args&&... args) { + constexpr auto compiled = detail::compile(S()); + if constexpr (std::is_same, + detail::unknown_format>()) { + return format_to(out, + static_cast>(S()), + std::forward(args)...); + } else { + return format_to(out, compiled, std::forward(args)...); + } +} #endif - constexpr auto compiled = detail::compile(S()); - return format(compiled, std::forward(args)...); -} - -template ::value)> -OutputIt format_to(OutputIt out, const CompiledFormat& cf, - const Args&... args) { - using char_type = typename CompiledFormat::char_type; - using context = format_context_t; - return detail::cf::vformat_to(out, cf, - make_format_args(args...)); -} template ::value)> -OutputIt format_to(OutputIt out, const S&, const Args&... args) { - constexpr auto compiled = detail::compile(S()); - return format_to(out, compiled, args...); -} - -template -auto format_to_n(OutputIt out, size_t n, const CompiledFormat& cf, - const Args&... args) -> - typename std::enable_if< - detail::is_output_iterator::value && - std::is_base_of::value, - format_to_n_result>::type { - auto it = - format_to(detail::truncating_iterator(out, n), cf, args...); +format_to_n_result format_to_n(OutputIt out, size_t n, + const S& format_str, Args&&... args) { + auto it = format_to(detail::truncating_iterator(out, n), format_str, + std::forward(args)...); return {it.base(), it.count()}; } -template ::value)> -format_to_n_result format_to_n(OutputIt out, size_t n, const S&, - const Args&... args) { - constexpr auto compiled = detail::compile(S()); - auto it = format_to(detail::truncating_iterator(out, n), compiled, - args...); - return {it.base(), it.count()}; +size_t formatted_size(const S& format_str, const Args&... args) { + return format_to(detail::counting_iterator(), format_str, args...).count(); } -template -size_t formatted_size(const CompiledFormat& cf, const Args&... args) { - return format_to(detail::counting_iterator(), cf, args...).count(); +template ::value)> +void print(std::FILE* f, const S& format_str, const Args&... args) { + memory_buffer buffer; + format_to(std::back_inserter(buffer), format_str, args...); + detail::print(f, {buffer.data(), buffer.size()}); } +template ::value)> +void print(const S& format_str, const Args&... args) { + print(stdout, format_str, args...); +} + +#if FMT_USE_NONTYPE_TEMPLATE_PARAMETERS +inline namespace literals { +template +constexpr detail::udl_compiled_string< + remove_cvref_t, + sizeof(Str.data) / sizeof(decltype(Str.data[0])), Str> +operator""_cf() { + return {}; +} +} // namespace literals +#endif + +FMT_MODULE_EXPORT_END FMT_END_NAMESPACE #endif // FMT_COMPILE_H_ diff --git a/src/fmt/core.h b/src/fmt/core.h index 7946921d8e..4c1f5e2c4b 100644 --- a/src/fmt/core.h +++ b/src/fmt/core.h @@ -1,4 +1,4 @@ -// Formatting library for C++ - the core API +// Formatting library for C++ - the core API for char/UTF-8 // // Copyright (c) 2012 - present, Victor Zverovich // All rights reserved. @@ -10,17 +10,15 @@ #include // std::FILE #include -#include #include -#include +#include #include #include -#include // The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 70103 +#define FMT_VERSION 80000 -#if defined (__clang__ ) && !defined(__ibmxl__) +#ifdef __clang__ # define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) #else # define FMT_CLANG_VERSION 0 @@ -28,14 +26,10 @@ #if defined(__GNUC__) && !defined(__clang__) # define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# define FMT_GCC_PRAGMA(arg) _Pragma(arg) #else # define FMT_GCC_VERSION 0 -#endif - -#if defined(__INTEL_COMPILER) -# define FMT_ICC_VERSION __INTEL_COMPILER -#else -# define FMT_ICC_VERSION 0 +# define FMT_GCC_PRAGMA(arg) #endif #if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) @@ -44,6 +38,12 @@ # define FMT_HAS_GXX_CXX11 0 #endif +#if defined(__INTEL_COMPILER) +# define FMT_ICC_VERSION __INTEL_COMPILER +#else +# define FMT_ICC_VERSION 0 +#endif + #ifdef __NVCC__ # define FMT_NVCC __NVCC__ #else @@ -52,10 +52,10 @@ #ifdef _MSC_VER # define FMT_MSC_VER _MSC_VER -# define FMT_SUPPRESS_MSC_WARNING(n) __pragma(warning(suppress : n)) +# define FMT_MSC_WARNING(...) __pragma(warning(__VA_ARGS__)) #else # define FMT_MSC_VER 0 -# define FMT_SUPPRESS_MSC_WARNING(n) +# define FMT_MSC_WARNING(...) #endif #ifdef __has_feature @@ -95,10 +95,26 @@ # define FMT_CONSTEXPR constexpr # define FMT_CONSTEXPR_DECL constexpr #else -# define FMT_CONSTEXPR inline +# define FMT_CONSTEXPR # define FMT_CONSTEXPR_DECL #endif +// Check if constexpr std::char_traits<>::compare,length is supported. +#if defined(__GLIBCXX__) +# if __cplusplus >= 201703L && defined(_GLIBCXX_RELEASE) && \ + _GLIBCXX_RELEASE >= 7 // GCC 7+ libstdc++ has _GLIBCXX_RELEASE. +# define FMT_CONSTEXPR_CHAR_TRAITS constexpr +# endif +#elif defined(_LIBCPP_VERSION) && __cplusplus >= 201703L && \ + _LIBCPP_VERSION >= 4000 +# define FMT_CONSTEXPR_CHAR_TRAITS constexpr +#elif FMT_MSC_VER >= 1914 && _MSVC_LANG >= 201703L +# define FMT_CONSTEXPR_CHAR_TRAITS constexpr +#endif +#ifndef FMT_CONSTEXPR_CHAR_TRAITS +# define FMT_CONSTEXPR_CHAR_TRAITS +#endif + #ifndef FMT_OVERRIDE # if FMT_HAS_FEATURE(cxx_override_control) || \ (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900 @@ -149,25 +165,40 @@ # define FMT_NORETURN #endif -#ifndef FMT_DEPRECATED -# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VER >= 1900 -# define FMT_DEPRECATED [[deprecated]] +#ifndef FMT_MAYBE_UNUSED +# if FMT_HAS_CPP17_ATTRIBUTE(maybe_unused) +# define FMT_MAYBE_UNUSED [[maybe_unused]] # else -# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) -# define FMT_DEPRECATED __attribute__((deprecated)) -# elif FMT_MSC_VER -# define FMT_DEPRECATED __declspec(deprecated) -# else -# define FMT_DEPRECATED /* deprecated */ -# endif +# define FMT_MAYBE_UNUSED # endif #endif -// Workaround broken [[deprecated]] in the Intel, PGI and NVCC compilers. -#if FMT_ICC_VERSION || defined(__PGI) || FMT_NVCC -# define FMT_DEPRECATED_ALIAS +#if __cplusplus == 201103L || __cplusplus == 201402L +# if defined(__INTEL_COMPILER) || defined(__PGI) +# define FMT_FALLTHROUGH +# elif defined(__clang__) +# define FMT_FALLTHROUGH [[clang::fallthrough]] +# elif FMT_GCC_VERSION >= 700 && \ + (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520) +# define FMT_FALLTHROUGH [[gnu::fallthrough]] +# else +# define FMT_FALLTHROUGH +# endif +#elif FMT_HAS_CPP17_ATTRIBUTE(fallthrough) || \ + (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) +# define FMT_FALLTHROUGH [[fallthrough]] #else -# define FMT_DEPRECATED_ALIAS FMT_DEPRECATED +# define FMT_FALLTHROUGH +#endif + +#ifndef FMT_USE_FLOAT +# define FMT_USE_FLOAT 1 +#endif +#ifndef FMT_USE_DOUBLE +# define FMT_USE_DOUBLE 1 +#endif +#ifndef FMT_USE_LONG_DOUBLE +# define FMT_USE_LONG_DOUBLE 1 #endif #ifndef FMT_INLINE @@ -180,19 +211,13 @@ #ifndef FMT_USE_INLINE_NAMESPACES # if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \ - (FMT_MSC_VER >= 1900 && !_MANAGED) + (FMT_MSC_VER >= 1900 && (!defined(_MANAGED) || !_MANAGED)) # define FMT_USE_INLINE_NAMESPACES 1 # else # define FMT_USE_INLINE_NAMESPACES 0 # endif #endif -// LAMMPS customization -// use 'v7_lmp' namespace instead of 'v7' so that our -// bundled copy does not collide with linking other code -// using system wide installations which may be using -// a different version. - #ifndef FMT_BEGIN_NAMESPACE # if FMT_USE_INLINE_NAMESPACES # define FMT_INLINE_NAMESPACE inline namespace @@ -203,41 +228,45 @@ # define FMT_INLINE_NAMESPACE namespace # define FMT_END_NAMESPACE \ } \ - using namespace v7_lmp; \ + using namespace v7; \ } # endif # define FMT_BEGIN_NAMESPACE \ namespace fmt { \ - FMT_INLINE_NAMESPACE v7_lmp { + FMT_INLINE_NAMESPACE v7 { +#endif + +#ifndef FMT_MODULE_EXPORT +# define FMT_MODULE_EXPORT +# define FMT_MODULE_EXPORT_BEGIN +# define FMT_MODULE_EXPORT_END +# define FMT_BEGIN_DETAIL_NAMESPACE namespace detail { +# define FMT_END_DETAIL_NAMESPACE } #endif #if !defined(FMT_HEADER_ONLY) && defined(_WIN32) -# define FMT_CLASS_API FMT_SUPPRESS_MSC_WARNING(4275) +# define FMT_CLASS_API FMT_MSC_WARNING(suppress : 4275) # ifdef FMT_EXPORT # define FMT_API __declspec(dllexport) -# define FMT_EXTERN_TEMPLATE_API FMT_API -# define FMT_EXPORTED # elif defined(FMT_SHARED) # define FMT_API __declspec(dllimport) -# define FMT_EXTERN_TEMPLATE_API FMT_API # endif #else # define FMT_CLASS_API +# if defined(FMT_EXPORT) || defined(FMT_SHARED) +# if defined(__GNUC__) || defined(__clang__) +# define FMT_API __attribute__((visibility("default"))) +# endif +# endif #endif #ifndef FMT_API # define FMT_API #endif -#ifndef FMT_EXTERN_TEMPLATE_API -# define FMT_EXTERN_TEMPLATE_API -#endif -#ifndef FMT_INSTANTIATION_DEF_API -# define FMT_INSTANTIATION_DEF_API FMT_API -#endif -#ifndef FMT_HEADER_ONLY -# define FMT_EXTERN extern +#if FMT_GCC_VERSION +# define FMT_GCC_VISIBILITY_HIDDEN __attribute__((visibility("hidden"))) #else -# define FMT_EXTERN +# define FMT_GCC_VISIBILITY_HIDDEN #endif // libc++ supports string_view in pre-c++17. @@ -254,11 +283,37 @@ #ifndef FMT_UNICODE # define FMT_UNICODE !FMT_MSC_VER #endif -#if FMT_UNICODE && FMT_MSC_VER -# pragma execution_character_set("utf-8") + +#ifndef FMT_CONSTEVAL +# if ((FMT_GCC_VERSION >= 1000 || FMT_CLANG_VERSION >= 1101) && \ + __cplusplus > 201703L) || \ + (defined(__cpp_consteval) && \ + !FMT_MSC_VER) // consteval is broken in MSVC. +# define FMT_CONSTEVAL consteval +# define FMT_HAS_CONSTEVAL +# else +# define FMT_CONSTEVAL +# endif +#endif + +#ifndef FMT_USE_NONTYPE_TEMPLATE_PARAMETERS +# if defined(__cpp_nontype_template_args) && \ + ((FMT_GCC_VERSION >= 903 && __cplusplus >= 201709L) || \ + __cpp_nontype_template_args >= 201911L) +# define FMT_USE_NONTYPE_TEMPLATE_PARAMETERS 1 +# else +# define FMT_USE_NONTYPE_TEMPLATE_PARAMETERS 0 +# endif +#endif + +// Enable minimal optimizations for more compact code in debug mode. +FMT_GCC_PRAGMA("GCC push_options") +#ifndef __OPTIMIZE__ +FMT_GCC_PRAGMA("GCC optimize(\"Og\")") #endif FMT_BEGIN_NAMESPACE +FMT_MODULE_EXPORT_BEGIN // Implementations of enable_if_t and other metafunctions for older systems. template @@ -269,23 +324,35 @@ template using bool_constant = std::integral_constant; template using remove_reference_t = typename std::remove_reference::type; template -using remove_const_t = typename std::remove_const::type; -template using remove_cvref_t = typename std::remove_cv>::type; template struct type_identity { using type = T; }; template using type_identity_t = typename type_identity::type; -struct monostate {}; +struct monostate { + constexpr monostate() {} +}; // An enable_if helper to be used in template parameters which results in much // shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed // to workaround a bug in MSVC 2019 (see #1140 and #1186). -#define FMT_ENABLE_IF(...) enable_if_t<(__VA_ARGS__), int> = 0 +#ifdef FMT_DOC +# define FMT_ENABLE_IF(...) +#else +# define FMT_ENABLE_IF(...) enable_if_t<(__VA_ARGS__), int> = 0 +#endif -namespace detail { +FMT_BEGIN_DETAIL_NAMESPACE -// A helper function to suppress "conditional expression is constant" warnings. -template constexpr T const_check(T value) { return value; } +constexpr FMT_INLINE auto is_constant_evaluated() FMT_NOEXCEPT -> bool { +#ifdef __cpp_lib_is_constant_evaluated + return std::is_constant_evaluated(); +#else + return false; +#endif +} + +// A function to suppress "conditional expression is constant" warnings. +template constexpr auto const_check(T value) -> T { return value; } FMT_NORETURN FMT_API void assert_fail(const char* file, int line, const char* message); @@ -318,38 +385,39 @@ template struct std_string_view {}; # define FMT_USE_INT128 1 using int128_t = __int128_t; using uint128_t = __uint128_t; +template inline auto convert_for_visit(T value) -> T { + return value; +} #else # define FMT_USE_INT128 0 #endif #if !FMT_USE_INT128 -struct int128_t {}; -struct uint128_t {}; +enum class int128_t {}; +enum class uint128_t {}; +// Reduce template instantiations. +template inline auto convert_for_visit(T) -> monostate { + return {}; +} #endif // Casts a nonnegative integer to unsigned. template -FMT_CONSTEXPR typename std::make_unsigned::type to_unsigned(Int value) { +FMT_CONSTEXPR auto to_unsigned(Int value) -> + typename std::make_unsigned::type { FMT_ASSERT(value >= 0, "negative value"); return static_cast::type>(value); } -FMT_SUPPRESS_MSC_WARNING(4566) constexpr unsigned char micro[] = "\u00B5"; +FMT_MSC_WARNING(suppress : 4566) constexpr unsigned char micro[] = "\u00B5"; -template constexpr bool is_unicode() { - return FMT_UNICODE || sizeof(Char) != 1 || - (sizeof(micro) == 3 && micro[0] == 0xC2 && micro[1] == 0xB5); +constexpr auto is_utf8() -> bool { + // Avoid buggy sign extensions in MSVC's constant evaluation mode. + // https://developercommunity.visualstudio.com/t/C-difference-in-behavior-for-unsigned/1233612 + using uchar = unsigned char; + return FMT_UNICODE || (sizeof(micro) == 3 && uchar(micro[0]) == 0xC2 && + uchar(micro[1]) == 0xB5); } - -#ifdef __cpp_char8_t -using char8_type = char8_t; -#else -enum char8_type : unsigned char {}; -#endif -} // namespace detail - -#ifdef FMT_USE_INTERNAL -namespace internal = detail; // DEPRECATED -#endif +FMT_END_DETAIL_NAMESPACE /** An implementation of ``std::basic_string_view`` for pre-C++17. It provides a @@ -380,11 +448,15 @@ template class basic_string_view { the size with ``std::char_traits::length``. \endrst */ -#if __cplusplus >= 201703L // C++17's char_traits::length() is constexpr. - FMT_CONSTEXPR -#endif - basic_string_view(const Char* s) - : data_(s), size_(std::char_traits::length(s)) {} + FMT_CONSTEXPR_CHAR_TRAITS + FMT_INLINE + basic_string_view(const Char* s) : data_(s) { + if (detail::const_check(std::is_same::value && + !detail::is_constant_evaluated())) + size_ = std::strlen(reinterpret_cast(s)); + else + size_ = std::char_traits::length(s); + } /** Constructs a string reference from a ``std::basic_string`` object. */ template @@ -399,15 +471,17 @@ template class basic_string_view { size_(s.size()) {} /** Returns a pointer to the string data. */ - constexpr const Char* data() const { return data_; } + constexpr auto data() const -> const Char* { return data_; } /** Returns the string size. */ - constexpr size_t size() const { return size_; } + constexpr auto size() const -> size_t { return size_; } - constexpr iterator begin() const { return data_; } - constexpr iterator end() const { return data_ + size_; } + constexpr auto begin() const -> iterator { return data_; } + constexpr auto end() const -> iterator { return data_ + size_; } - constexpr const Char& operator[](size_t pos) const { return data_[pos]; } + constexpr auto operator[](size_t pos) const -> const Char& { + return data_[pos]; + } FMT_CONSTEXPR void remove_prefix(size_t n) { data_ += n; @@ -415,7 +489,7 @@ template class basic_string_view { } // Lexicographically compare this string reference to other. - int compare(basic_string_view other) const { + FMT_CONSTEXPR_CHAR_TRAITS auto compare(basic_string_view other) const -> int { size_t str_size = size_ < other.size_ ? size_ : other.size_; int result = std::char_traits::compare(data_, other.data_, str_size); if (result == 0) @@ -423,36 +497,33 @@ template class basic_string_view { return result; } - friend bool operator==(basic_string_view lhs, basic_string_view rhs) { + FMT_CONSTEXPR_CHAR_TRAITS friend auto operator==(basic_string_view lhs, + basic_string_view rhs) + -> bool { return lhs.compare(rhs) == 0; } - friend bool operator!=(basic_string_view lhs, basic_string_view rhs) { + friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool { return lhs.compare(rhs) != 0; } - friend bool operator<(basic_string_view lhs, basic_string_view rhs) { + friend auto operator<(basic_string_view lhs, basic_string_view rhs) -> bool { return lhs.compare(rhs) < 0; } - friend bool operator<=(basic_string_view lhs, basic_string_view rhs) { + friend auto operator<=(basic_string_view lhs, basic_string_view rhs) -> bool { return lhs.compare(rhs) <= 0; } - friend bool operator>(basic_string_view lhs, basic_string_view rhs) { + friend auto operator>(basic_string_view lhs, basic_string_view rhs) -> bool { return lhs.compare(rhs) > 0; } - friend bool operator>=(basic_string_view lhs, basic_string_view rhs) { + friend auto operator>=(basic_string_view lhs, basic_string_view rhs) -> bool { return lhs.compare(rhs) >= 0; } }; using string_view = basic_string_view; -using wstring_view = basic_string_view; /** Specifies if ``T`` is a character type. Can be specialized by users. */ template struct is_char : std::false_type {}; template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; /** \rst @@ -471,24 +542,26 @@ template <> struct is_char : std::true_type {}; \endrst */ template ::value)> -inline basic_string_view to_string_view(const Char* s) { +FMT_INLINE auto to_string_view(const Char* s) -> basic_string_view { return s; } template -inline basic_string_view to_string_view( - const std::basic_string& s) { +inline auto to_string_view(const std::basic_string& s) + -> basic_string_view { return s; } template -inline basic_string_view to_string_view(basic_string_view s) { +constexpr auto to_string_view(basic_string_view s) + -> basic_string_view { return s; } template >::value)> -inline basic_string_view to_string_view(detail::std_string_view s) { +inline auto to_string_view(detail::std_string_view s) + -> basic_string_view { return s; } @@ -500,15 +573,15 @@ template struct is_compile_string : std::is_base_of {}; template ::value)> -constexpr basic_string_view to_string_view(const S& s) { - return s; +constexpr auto to_string_view(const S& s) + -> basic_string_view { + return basic_string_view(s); } -// LAMMPS customization using 'v7_lmp' instead of 'v7' +FMT_BEGIN_DETAIL_NAMESPACE -namespace detail { void to_string_view(...); -using fmt::v7_lmp::to_string_view; +using fmt::v7::to_string_view; // Specifies whether S is a string type convertible to fmt::basic_string_view. // It should be a constexpr function but MSVC 2017 fails to compile it in @@ -542,7 +615,7 @@ struct error_handler { // This function is intentionally not constexpr to give a compile-time error. FMT_NORETURN FMT_API void on_error(const char* message); }; -} // namespace detail +FMT_END_DETAIL_NAMESPACE /** String's character type. */ template using char_t = typename detail::char_t_impl::type; @@ -551,16 +624,7 @@ template using char_t = typename detail::char_t_impl::type; \rst Parsing context consisting of a format string range being parsed and an argument counter for automatic indexing. - - You can use one of the following type aliases for common character types: - - +-----------------------+-------------------------------------+ - | Type | Definition | - +=======================+=====================================+ - | format_parse_context | basic_format_parse_context | - +-----------------------+-------------------------------------+ - | wformat_parse_context | basic_format_parse_context | - +-----------------------+-------------------------------------+ + You can use the ```format_parse_context`` type alias for ``char`` instead. \endrst */ template @@ -582,12 +646,16 @@ class basic_format_parse_context : private ErrorHandler { Returns an iterator to the beginning of the format string range being parsed. */ - constexpr iterator begin() const FMT_NOEXCEPT { return format_str_.begin(); } + constexpr auto begin() const FMT_NOEXCEPT -> iterator { + return format_str_.begin(); + } /** Returns an iterator past the end of the format string range being parsed. */ - constexpr iterator end() const FMT_NOEXCEPT { return format_str_.end(); } + constexpr auto end() const FMT_NOEXCEPT -> iterator { + return format_str_.end(); + } /** Advances the begin iterator to ``it``. */ FMT_CONSTEXPR void advance_to(iterator it) { @@ -598,7 +666,7 @@ class basic_format_parse_context : private ErrorHandler { Reports an error if using the manual argument indexing; otherwise returns the next argument index and switches to the automatic indexing. */ - FMT_CONSTEXPR int next_arg_id() { + FMT_CONSTEXPR auto next_arg_id() -> int { // Don't check if the argument id is valid to avoid overhead and because it // will be checked during formatting anyway. if (next_arg_id_ >= 0) return next_arg_id_++; @@ -623,11 +691,10 @@ class basic_format_parse_context : private ErrorHandler { ErrorHandler::on_error(message); } - constexpr ErrorHandler error_handler() const { return *this; } + constexpr auto error_handler() const -> ErrorHandler { return *this; } }; using format_parse_context = basic_format_parse_context; -using wformat_parse_context = basic_format_parse_context; template class basic_format_arg; template class basic_format_args; @@ -651,11 +718,14 @@ template struct is_contiguous : std::false_type {}; template struct is_contiguous> : std::true_type {}; -namespace detail { +class appender; + +FMT_BEGIN_DETAIL_NAMESPACE // Extracts a reference to the container from back_insert_iterator. template -inline Container& get_container(std::back_insert_iterator it) { +inline auto get_container(std::back_insert_iterator it) + -> Container& { using bi_iterator = std::back_insert_iterator; struct accessor : bi_iterator { accessor(bi_iterator iter) : bi_iterator(iter) {} @@ -664,6 +734,23 @@ inline Container& get_container(std::back_insert_iterator it) { return *accessor(it).container; } +template +FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out) + -> OutputIt { + while (begin != end) *out++ = static_cast(*begin++); + return out; +} + +template ::value)> +FMT_CONSTEXPR auto copy_str(const Char* begin, const Char* end, Char* out) + -> Char* { + if (is_constant_evaluated()) + return copy_str(begin, end, out); + auto size = to_unsigned(end - begin); + memcpy(out, begin, size); + return out + size; +} + /** \rst A contiguous memory buffer with an optional growing ability. It is an internal @@ -678,7 +765,7 @@ template class buffer { protected: // Don't initialize ptr_ since it is not accessed to save a few cycles. - FMT_SUPPRESS_MSC_WARNING(26495) + FMT_MSC_WARNING(suppress : 26495) buffer(size_t sz) FMT_NOEXCEPT : size_(sz), capacity_(sz) {} buffer(T* p = nullptr, size_t sz = 0, size_t cap = 0) FMT_NOEXCEPT @@ -687,6 +774,7 @@ template class buffer { capacity_(cap) {} ~buffer() = default; + buffer(buffer&&) = default; /** Sets the buffer data and capacity. */ void set(T* buf_data, size_t buf_capacity) FMT_NOEXCEPT { @@ -704,23 +792,23 @@ template class buffer { buffer(const buffer&) = delete; void operator=(const buffer&) = delete; - T* begin() FMT_NOEXCEPT { return ptr_; } - T* end() FMT_NOEXCEPT { return ptr_ + size_; } + auto begin() FMT_NOEXCEPT -> T* { return ptr_; } + auto end() FMT_NOEXCEPT -> T* { return ptr_ + size_; } - const T* begin() const FMT_NOEXCEPT { return ptr_; } - const T* end() const FMT_NOEXCEPT { return ptr_ + size_; } + auto begin() const FMT_NOEXCEPT -> const T* { return ptr_; } + auto end() const FMT_NOEXCEPT -> const T* { return ptr_ + size_; } /** Returns the size of this buffer. */ - size_t size() const FMT_NOEXCEPT { return size_; } + auto size() const FMT_NOEXCEPT -> size_t { return size_; } /** Returns the capacity of this buffer. */ - size_t capacity() const FMT_NOEXCEPT { return capacity_; } + auto capacity() const FMT_NOEXCEPT -> size_t { return capacity_; } /** Returns a pointer to the buffer data. */ - T* data() FMT_NOEXCEPT { return ptr_; } + auto data() FMT_NOEXCEPT -> T* { return ptr_; } /** Returns a pointer to the buffer data. */ - const T* data() const FMT_NOEXCEPT { return ptr_; } + auto data() const FMT_NOEXCEPT -> const T* { return ptr_; } /** Clears this buffer. */ void clear() { size_ = 0; } @@ -748,16 +836,16 @@ template class buffer { /** Appends data to the end of the buffer. */ template void append(const U* begin, const U* end); - template T& operator[](I index) { return ptr_[index]; } - template const T& operator[](I index) const { + template auto operator[](I index) -> T& { return ptr_[index]; } + template auto operator[](I index) const -> const T& { return ptr_[index]; } }; struct buffer_traits { explicit buffer_traits(size_t) {} - size_t count() const { return 0; } - size_t limit(size_t size) { return size; } + auto count() const -> size_t { return 0; } + auto limit(size_t size) -> size_t { return size; } }; class fixed_buffer_traits { @@ -767,8 +855,8 @@ class fixed_buffer_traits { public: explicit fixed_buffer_traits(size_t limit) : limit_(limit) {} - size_t count() const { return count_; } - size_t limit(size_t size) { + auto count() const -> size_t { return count_; } + auto limit(size_t size) -> size_t { size_t n = limit_ > count_ ? limit_ - count_ : 0; count_ += size; return size < n ? size : n; @@ -787,20 +875,25 @@ class iterator_buffer final : public Traits, public buffer { void grow(size_t) final FMT_OVERRIDE { if (this->size() == buffer_size) flush(); } - void flush(); + + void flush() { + auto size = this->size(); + this->clear(); + out_ = copy_str(data_, data_ + this->limit(size), out_); + } public: explicit iterator_buffer(OutputIt out, size_t n = buffer_size) - : Traits(n), - buffer(data_, 0, buffer_size), - out_(out) {} + : Traits(n), buffer(data_, 0, buffer_size), out_(out) {} + iterator_buffer(iterator_buffer&& other) + : Traits(other), buffer(data_, 0, buffer_size), out_(other.out_) {} ~iterator_buffer() { flush(); } - OutputIt out() { + auto out() -> OutputIt { flush(); return out_; } - size_t count() const { return Traits::count() + this->size(); } + auto count() const -> size_t { return Traits::count() + this->size(); } }; template class iterator_buffer final : public buffer { @@ -810,7 +903,7 @@ template class iterator_buffer final : public buffer { public: explicit iterator_buffer(T* out, size_t = 0) : buffer(out, 0, ~size_t()) {} - T* out() { return &*this->end(); } + auto out() -> T* { return &*this->end(); } }; // A buffer that writes to a container with the contiguous storage. @@ -833,7 +926,7 @@ class iterator_buffer, : buffer(c.size()), container_(c) {} explicit iterator_buffer(std::back_insert_iterator out, size_t = 0) : iterator_buffer(get_container(out)) {} - std::back_insert_iterator out() { + auto out() -> std::back_insert_iterator { return std::back_inserter(container_); } }; @@ -855,48 +948,24 @@ template class counting_buffer final : public buffer { public: counting_buffer() : buffer(data_, 0, buffer_size) {} - size_t count() { return count_ + this->size(); } + auto count() -> size_t { return count_ + this->size(); } }; -// An output iterator that appends to the buffer. -// It is used to reduce symbol sizes for the common case. template -class buffer_appender : public std::back_insert_iterator> { - using base = std::back_insert_iterator>; +using buffer_appender = conditional_t::value, appender, + std::back_insert_iterator>>; - public: - explicit buffer_appender(buffer& buf) : base(buf) {} - buffer_appender(base it) : base(it) {} - - buffer_appender& operator++() { - base::operator++(); - return *this; - } - - buffer_appender operator++(int) { - buffer_appender tmp = *this; - ++*this; - return tmp; - } -}; - -// Maps an output iterator into a buffer. +// Maps an output iterator to a buffer. template -iterator_buffer get_buffer(OutputIt); -template buffer& get_buffer(buffer_appender); - -template OutputIt get_buffer_init(OutputIt out) { - return out; -} -template buffer& get_buffer_init(buffer_appender out) { - return get_container(out); +auto get_buffer(OutputIt out) -> iterator_buffer { + return iterator_buffer(out); } template auto get_iterator(Buffer& buf) -> decltype(buf.out()) { return buf.out(); } -template buffer_appender get_iterator(buffer& buf) { +template auto get_iterator(buffer& buf) -> buffer_appender { return buffer_appender(buf); } @@ -906,9 +975,9 @@ struct fallback_formatter { }; // Specifies if T has an enabled fallback_formatter specialization. -template +template using has_fallback_formatter = - std::is_constructible>; + std::is_constructible>; struct view {}; @@ -933,8 +1002,8 @@ struct arg_data { template arg_data(const U&... init) : args_{T(named_args_, NUM_NAMED_ARGS), init...} {} arg_data(const arg_data& other) = delete; - const T* args() const { return args_ + 1; } - named_arg_info* named_args() { return named_args_; } + auto args() const -> const T* { return args_ + 1; } + auto named_args() -> named_arg_info* { return named_args_; } }; template @@ -943,42 +1012,47 @@ struct arg_data { T args_[NUM_ARGS != 0 ? NUM_ARGS : +1]; template - FMT_INLINE arg_data(const U&... init) : args_{init...} {} - FMT_INLINE const T* args() const { return args_; } - FMT_INLINE std::nullptr_t named_args() { return nullptr; } + FMT_CONSTEXPR FMT_INLINE arg_data(const U&... init) : args_{init...} {} + FMT_CONSTEXPR FMT_INLINE auto args() const -> const T* { return args_; } + FMT_CONSTEXPR FMT_INLINE auto named_args() -> std::nullptr_t { + return nullptr; + } }; template inline void init_named_args(named_arg_info*, int, int) {} -template +template struct is_named_arg : std::false_type {}; +template struct is_statically_named_arg : std::false_type {}; + +template +struct is_named_arg> : std::true_type {}; + +template ::value)> void init_named_args(named_arg_info* named_args, int arg_count, int named_arg_count, const T&, const Tail&... args) { init_named_args(named_args, arg_count + 1, named_arg_count, args...); } -template +template ::value)> void init_named_args(named_arg_info* named_args, int arg_count, - int named_arg_count, const named_arg& arg, - const Tail&... args) { + int named_arg_count, const T& arg, const Tail&... args) { named_args[named_arg_count++] = {arg.name, arg_count}; init_named_args(named_args, arg_count + 1, named_arg_count, args...); } template -FMT_INLINE void init_named_args(std::nullptr_t, int, int, const Args&...) {} +FMT_CONSTEXPR FMT_INLINE void init_named_args(std::nullptr_t, int, int, + const Args&...) {} -template struct is_named_arg : std::false_type {}; - -template -struct is_named_arg> : std::true_type {}; - -template constexpr size_t count() { return B ? 1 : 0; } -template constexpr size_t count() { +template constexpr auto count() -> size_t { return B ? 1 : 0; } +template constexpr auto count() -> size_t { return (B1 ? 1 : 0) + count(); } -template constexpr size_t count_named_args() { +template constexpr auto count_named_args() -> size_t { return count::value...>(); } @@ -1059,6 +1133,7 @@ template class value { using char_type = typename Context::char_type; union { + monostate no_value; int int_value; unsigned uint_value; long long long_long_value; @@ -1076,19 +1151,23 @@ template class value { named_arg_value named_args; }; - constexpr FMT_INLINE value(int val = 0) : int_value(val) {} + constexpr FMT_INLINE value() : no_value() {} + constexpr FMT_INLINE value(int val) : int_value(val) {} constexpr FMT_INLINE value(unsigned val) : uint_value(val) {} - FMT_INLINE value(long long val) : long_long_value(val) {} - FMT_INLINE value(unsigned long long val) : ulong_long_value(val) {} + constexpr FMT_INLINE value(long long val) : long_long_value(val) {} + constexpr FMT_INLINE value(unsigned long long val) : ulong_long_value(val) {} FMT_INLINE value(int128_t val) : int128_value(val) {} FMT_INLINE value(uint128_t val) : uint128_value(val) {} FMT_INLINE value(float val) : float_value(val) {} FMT_INLINE value(double val) : double_value(val) {} FMT_INLINE value(long double val) : long_double_value(val) {} - FMT_INLINE value(bool val) : bool_value(val) {} - FMT_INLINE value(char_type val) : char_value(val) {} - FMT_INLINE value(const char_type* val) { string.data = val; } - FMT_INLINE value(basic_string_view val) { + constexpr FMT_INLINE value(bool val) : bool_value(val) {} + constexpr FMT_INLINE value(char_type val) : char_value(val) {} + FMT_CONSTEXPR FMT_INLINE value(const char_type* val) { + string.data = val; + if (is_constant_evaluated()) string.size = {}; + } + FMT_CONSTEXPR FMT_INLINE value(basic_string_view val) { string.data = val.data(); string.size = val.size(); } @@ -1096,7 +1175,7 @@ template class value { FMT_INLINE value(const named_arg_info* args, size_t size) : named_args{args, size} {} - template FMT_INLINE value(const T& val) { + template FMT_CONSTEXPR FMT_INLINE value(const T& val) { custom.value = &val; // Get the formatter type through the context to allow different contexts // have different extension points, e.g. `formatter` for `format` and @@ -1120,7 +1199,7 @@ template class value { }; template -FMT_CONSTEXPR basic_format_arg make_arg(const T& value); +FMT_CONSTEXPR auto make_arg(const T& value) -> basic_format_arg; // To minimize the number of types we need to deal with, long is translated // either to int or to long long depending on its size. @@ -1134,36 +1213,52 @@ struct unformattable {}; template struct arg_mapper { using char_type = typename Context::char_type; - FMT_CONSTEXPR int map(signed char val) { return val; } - FMT_CONSTEXPR unsigned map(unsigned char val) { return val; } - FMT_CONSTEXPR int map(short val) { return val; } - FMT_CONSTEXPR unsigned map(unsigned short val) { return val; } - FMT_CONSTEXPR int map(int val) { return val; } - FMT_CONSTEXPR unsigned map(unsigned val) { return val; } - FMT_CONSTEXPR long_type map(long val) { return val; } - FMT_CONSTEXPR ulong_type map(unsigned long val) { return val; } - FMT_CONSTEXPR long long map(long long val) { return val; } - FMT_CONSTEXPR unsigned long long map(unsigned long long val) { return val; } - FMT_CONSTEXPR int128_t map(int128_t val) { return val; } - FMT_CONSTEXPR uint128_t map(uint128_t val) { return val; } - FMT_CONSTEXPR bool map(bool val) { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(signed char val) -> int { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(unsigned char val) -> unsigned { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(short val) -> int { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(unsigned short val) -> unsigned { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(int val) -> int { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(unsigned val) -> unsigned { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(long val) -> long_type { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(unsigned long val) -> ulong_type { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(long long val) -> long long { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(unsigned long long val) + -> unsigned long long { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(int128_t val) -> int128_t { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(uint128_t val) -> uint128_t { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(bool val) -> bool { return val; } template ::value)> - FMT_CONSTEXPR char_type map(T val) { + FMT_CONSTEXPR FMT_INLINE auto map(T val) -> char_type { static_assert( std::is_same::value || std::is_same::value, "mixing character types is disallowed"); return val; } - FMT_CONSTEXPR float map(float val) { return val; } - FMT_CONSTEXPR double map(double val) { return val; } - FMT_CONSTEXPR long double map(long double val) { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(float val) -> float { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(double val) -> double { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(long double val) -> long double { + return val; + } - FMT_CONSTEXPR const char_type* map(char_type* val) { return val; } - FMT_CONSTEXPR const char_type* map(const char_type* val) { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(char_type* val) -> const char_type* { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(const char_type* val) -> const char_type* { + return val; + } template ::value)> - FMT_CONSTEXPR basic_string_view map(const T& val) { + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> basic_string_view { static_assert(std::is_same>::value, "mixing character types is disallowed"); return to_string_view(val); @@ -1172,8 +1267,9 @@ template struct arg_mapper { FMT_ENABLE_IF( std::is_constructible, T>::value && !is_string::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR basic_string_view map(const T& val) { + !has_fallback_formatter::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> basic_string_view { return basic_string_view(val); } template < @@ -1182,31 +1278,40 @@ template struct arg_mapper { std::is_constructible, T>::value && !std::is_constructible, T>::value && !is_string::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR basic_string_view map(const T& val) { + !has_fallback_formatter::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> basic_string_view { return std_string_view(val); } - FMT_CONSTEXPR const char* map(const signed char* val) { + FMT_CONSTEXPR FMT_INLINE auto map(const signed char* val) -> const char* { static_assert(std::is_same::value, "invalid string type"); return reinterpret_cast(val); } - FMT_CONSTEXPR const char* map(const unsigned char* val) { + FMT_CONSTEXPR FMT_INLINE auto map(const unsigned char* val) -> const char* { static_assert(std::is_same::value, "invalid string type"); return reinterpret_cast(val); } - FMT_CONSTEXPR const char* map(signed char* val) { + FMT_CONSTEXPR FMT_INLINE auto map(signed char* val) -> const char* { const auto* const_val = val; return map(const_val); } - FMT_CONSTEXPR const char* map(unsigned char* val) { + FMT_CONSTEXPR FMT_INLINE auto map(unsigned char* val) -> const char* { const auto* const_val = val; return map(const_val); } - FMT_CONSTEXPR const void* map(void* val) { return val; } - FMT_CONSTEXPR const void* map(const void* val) { return val; } - FMT_CONSTEXPR const void* map(std::nullptr_t val) { return val; } - template FMT_CONSTEXPR int map(const T*) { + FMT_CONSTEXPR FMT_INLINE auto map(void* val) -> const void* { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(const void* val) -> const void* { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(std::nullptr_t val) -> const void* { + return val; + } + + // We use SFINAE instead of a const T* parameter to avoid conflicting with + // the C array overload. + template + FMT_CONSTEXPR auto map(T) -> enable_if_t::value, int> { // Formatting of arbitrary pointers is disallowed. If you want to output // a pointer cast it to "void *" or "const void *". In particular, this // forbids formatting of "[const] volatile char *" which is printed as bool @@ -1215,11 +1320,16 @@ template struct arg_mapper { return 0; } + template + FMT_CONSTEXPR FMT_INLINE auto map(const T (&values)[N]) -> const T (&)[N] { + return values; + } + template ::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR auto map(const T& val) + !has_fallback_formatter::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) -> decltype(std::declval().map( static_cast::type>(val))) { return map(static_cast::type>(val)); @@ -1227,18 +1337,18 @@ template struct arg_mapper { template ::value && !is_char::value && (has_formatter::value || - has_fallback_formatter::value))> - FMT_CONSTEXPR const T& map(const T& val) { + has_fallback_formatter::value))> + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) -> const T& { return val; } - template - FMT_CONSTEXPR auto map(const named_arg& val) - -> decltype(std::declval().map(val.value)) { - return map(val.value); + template ::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T& named_arg) + -> decltype(std::declval().map(named_arg.value)) { + return map(named_arg.value); } - unformattable map(...) { return {}; } + auto map(...) -> unformattable { return {}; } }; // A type constant after applying arg_mapper. @@ -1252,7 +1362,35 @@ enum { packed_arg_bits = 4 }; enum { max_packed_args = 62 / packed_arg_bits }; enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; enum : unsigned long long { has_named_args_bit = 1ULL << 62 }; -} // namespace detail + +FMT_END_DETAIL_NAMESPACE + +// An output iterator that appends to a buffer. +// It is used to reduce symbol sizes for the common case. +class appender : public std::back_insert_iterator> { + using base = std::back_insert_iterator>; + + template + friend auto get_buffer(appender out) -> detail::buffer& { + return detail::get_container(out); + } + + public: + using std::back_insert_iterator>::back_insert_iterator; + appender(base it) : base(it) {} + using _Unchecked_type = appender; // Mark iterator as checked. + + auto operator++() -> appender& { + base::operator++(); + return *this; + } + + auto operator++(int) -> appender { + auto tmp = *this; + ++*this; + return tmp; + } +}; // A formatting argument. It is a trivially copyable/constructible type to // allow storage in basic_memory_buffer. @@ -1262,8 +1400,8 @@ template class basic_format_arg { detail::type type_; template - friend FMT_CONSTEXPR basic_format_arg detail::make_arg( - const T& value); + friend FMT_CONSTEXPR auto detail::make_arg(const T& value) + -> basic_format_arg; template friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, @@ -1301,10 +1439,12 @@ template class basic_format_arg { return type_ != detail::type::none_type; } - detail::type type() const { return type_; } + auto type() const -> detail::type { return type_; } - bool is_integral() const { return detail::is_integral_type(type_); } - bool is_arithmetic() const { return detail::is_arithmetic_type(type_); } + auto is_integral() const -> bool { return detail::is_integral_type(type_); } + auto is_arithmetic() const -> bool { + return detail::is_arithmetic_type(type_); + } }; /** @@ -1315,9 +1455,8 @@ template class basic_format_arg { \endrst */ template -FMT_CONSTEXPR_DECL FMT_INLINE auto visit_format_arg( +FMT_CONSTEXPR FMT_INLINE auto visit_format_arg( Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)) { - using char_type = typename Context::char_type; switch (arg.type_) { case detail::type::none_type: break; @@ -1329,16 +1468,10 @@ FMT_CONSTEXPR_DECL FMT_INLINE auto visit_format_arg( return vis(arg.value_.long_long_value); case detail::type::ulong_long_type: return vis(arg.value_.ulong_long_value); -#if FMT_USE_INT128 case detail::type::int128_type: - return vis(arg.value_.int128_value); + return vis(detail::convert_for_visit(arg.value_.int128_value)); case detail::type::uint128_type: - return vis(arg.value_.uint128_value); -#else - case detail::type::int128_type: - case detail::type::uint128_type: - break; -#endif + return vis(detail::convert_for_visit(arg.value_.uint128_value)); case detail::type::bool_type: return vis(arg.value_.bool_value); case detail::type::char_type: @@ -1352,8 +1485,8 @@ FMT_CONSTEXPR_DECL FMT_INLINE auto visit_format_arg( case detail::type::cstring_type: return vis(arg.value_.string.data); case detail::type::string_type: - return vis(basic_string_view(arg.value_.string.data, - arg.value_.string.size)); + using sv = basic_string_view; + return vis(sv(arg.value_.string.data, arg.value_.string.size)); case detail::type::pointer_type: return vis(arg.value_.pointer); case detail::type::custom_type: @@ -1362,14 +1495,22 @@ FMT_CONSTEXPR_DECL FMT_INLINE auto visit_format_arg( return vis(monostate()); } -template struct formattable : std::false_type {}; +FMT_BEGIN_DETAIL_NAMESPACE -namespace detail { +template +auto copy_str(InputIt begin, InputIt end, appender out) -> appender { + get_container(out).append(begin, end); + return out; +} +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 // A workaround for gcc 4.8 to make void_t work in a SFINAE context. template struct void_t_impl { using type = void; }; template using void_t = typename detail::void_t_impl::type; +#else +template using void_t = void; +#endif template struct is_output_iterator : std::false_type {}; @@ -1392,9 +1533,8 @@ struct is_contiguous_back_insert_iterator : std::false_type {}; template struct is_contiguous_back_insert_iterator> : is_contiguous {}; -template -struct is_contiguous_back_insert_iterator> - : std::true_type {}; +template <> +struct is_contiguous_back_insert_iterator : std::true_type {}; // A type-erased reference to an std::locale to avoid heavy include. class locale_ref { @@ -1402,97 +1542,52 @@ class locale_ref { const void* locale_; // A type-erased pointer to std::locale. public: - locale_ref() : locale_(nullptr) {} + constexpr locale_ref() : locale_(nullptr) {} template explicit locale_ref(const Locale& loc); explicit operator bool() const FMT_NOEXCEPT { return locale_ != nullptr; } - template Locale get() const; + template auto get() const -> Locale; }; -template constexpr unsigned long long encode_types() { return 0; } +template constexpr auto encode_types() -> unsigned long long { + return 0; +} template -constexpr unsigned long long encode_types() { +constexpr auto encode_types() -> unsigned long long { return static_cast(mapped_type_constant::value) | (encode_types() << packed_arg_bits); } template -FMT_CONSTEXPR basic_format_arg make_arg(const T& value) { +FMT_CONSTEXPR auto make_arg(const T& value) -> basic_format_arg { basic_format_arg arg; arg.type_ = mapped_type_constant::value; arg.value_ = arg_mapper().map(value); return arg; } -template int check(unformattable) { - static_assert( - formattable(), - "Cannot format an argument. To make type T formattable provide a " - "formatter specialization: https://fmt.dev/latest/api.html#udt"); - return 0; -} -template inline const U& check(const U& val) { - return val; -} - // The type template parameter is there to avoid an ODR violation when using // a fallback formatter in one translation unit and an implicit conversion in // another (not recommended). template -inline value make_arg(const T& val) { - return check(arg_mapper().map(val)); +FMT_CONSTEXPR FMT_INLINE auto make_arg(const T& val) -> value { + const auto& arg = arg_mapper().map(val); + static_assert( + !std::is_same::value, + "Cannot format an argument. To make type T formattable provide a " + "formatter specialization: https://fmt.dev/latest/api.html#udt"); + return {arg}; } template -inline basic_format_arg make_arg(const T& value) { +inline auto make_arg(const T& value) -> basic_format_arg { return make_arg(value); } - -template struct is_reference_wrapper : std::false_type {}; -template -struct is_reference_wrapper> : std::true_type {}; - -template const T& unwrap(const T& v) { return v; } -template const T& unwrap(const std::reference_wrapper& v) { - return static_cast(v); -} - -class dynamic_arg_list { - // Workaround for clang's -Wweak-vtables. Unlike for regular classes, for - // templates it doesn't complain about inability to deduce single translation - // unit for placing vtable. So storage_node_base is made a fake template. - template struct node { - virtual ~node() = default; - std::unique_ptr> next; - }; - - template struct typed_node : node<> { - T value; - - template - FMT_CONSTEXPR typed_node(const Arg& arg) : value(arg) {} - - template - FMT_CONSTEXPR typed_node(const basic_string_view& arg) - : value(arg.data(), arg.size()) {} - }; - - std::unique_ptr> head_; - - public: - template const T& push(const Arg& arg) { - auto new_node = std::unique_ptr>(new typed_node(arg)); - auto& value = new_node->value; - new_node->next = std::move(head_); - head_ = std::move(new_node); - return value; - } -}; -} // namespace detail +FMT_END_DETAIL_NAMESPACE // Formatting context. template class basic_format_context { @@ -1511,46 +1606,59 @@ template class basic_format_context { using parse_context_type = basic_format_parse_context; template using formatter_type = formatter; + basic_format_context(basic_format_context&&) = default; basic_format_context(const basic_format_context&) = delete; void operator=(const basic_format_context&) = delete; /** Constructs a ``basic_format_context`` object. References to the arguments are stored in the object so make sure they have appropriate lifetimes. */ - basic_format_context(OutputIt out, - basic_format_args ctx_args, - detail::locale_ref loc = detail::locale_ref()) + constexpr basic_format_context( + OutputIt out, basic_format_args ctx_args, + detail::locale_ref loc = detail::locale_ref()) : out_(out), args_(ctx_args), loc_(loc) {} - format_arg arg(int id) const { return args_.get(id); } - format_arg arg(basic_string_view name) { return args_.get(name); } - int arg_id(basic_string_view name) { return args_.get_id(name); } - const basic_format_args& args() const { return args_; } + constexpr auto arg(int id) const -> format_arg { return args_.get(id); } + FMT_CONSTEXPR auto arg(basic_string_view name) -> format_arg { + return args_.get(name); + } + FMT_CONSTEXPR auto arg_id(basic_string_view name) -> int { + return args_.get_id(name); + } + auto args() const -> const basic_format_args& { + return args_; + } - detail::error_handler error_handler() { return {}; } + FMT_CONSTEXPR auto error_handler() -> detail::error_handler { return {}; } void on_error(const char* message) { error_handler().on_error(message); } // Returns an iterator to the beginning of the output range. - iterator out() { return out_; } + FMT_CONSTEXPR auto out() -> iterator { return out_; } // Advances the begin iterator to ``it``. void advance_to(iterator it) { if (!detail::is_back_insert_iterator()) out_ = it; } - detail::locale_ref locale() { return loc_; } + FMT_CONSTEXPR auto locale() -> detail::locale_ref { return loc_; } }; template using buffer_context = basic_format_context, Char>; using format_context = buffer_context; -using wformat_context = buffer_context; // Workaround an alias issue: https://stackoverflow.com/q/62767544/471164. #define FMT_BUFFER_CONTEXT(Char) \ basic_format_context, Char> +template +using is_formattable = bool_constant< + !std::is_same>().map( + std::declval())), + detail::unformattable>::value && + !detail::has_fallback_formatter::value>; + /** \rst An array of references to arguments. It can be implicitly converted into @@ -1587,7 +1695,7 @@ class format_arg_store : 0); public: - format_arg_store(const Args&... args) + FMT_CONSTEXPR FMT_INLINE format_arg_store(const Args&... args) : #if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 basic_format_args(*this), @@ -1608,36 +1716,16 @@ class format_arg_store \endrst */ template -inline format_arg_store make_format_args( - const Args&... args) { +constexpr auto make_format_args(const Args&... args) + -> format_arg_store { return {args...}; } /** \rst - Constructs a `~fmt::format_arg_store` object that contains references - to arguments and can be implicitly converted to `~fmt::format_args`. - If ``format_str`` is a compile-time string then `make_args_checked` checks - its validity at compile time. - \endrst - */ -template > -inline auto make_args_checked(const S& format_str, - const remove_reference_t&... args) - -> format_arg_store, remove_reference_t...> { - static_assert( - detail::count<( - std::is_base_of>::value && - std::is_reference::value)...>() == 0, - "passing views as lvalues is disallowed"); - detail::check_format_string(format_str); - return {args...}; -} - -/** - \rst - Returns a named argument to be used in a formatting function. It should only - be used in a call to a formatting function. + Returns a named argument to be used in a formatting function. + It should only be used in a call to a formatting function or + `dynamic_format_arg_store::push_back`. **Example**:: @@ -1645,184 +1733,11 @@ inline auto make_args_checked(const S& format_str, \endrst */ template -inline detail::named_arg arg(const Char* name, const T& arg) { +inline auto arg(const Char* name, const T& arg) -> detail::named_arg { static_assert(!detail::is_named_arg(), "nested named arguments"); return {name, arg}; } -/** - \rst - A dynamic version of `fmt::format_arg_store`. - It's equipped with a storage to potentially temporary objects which lifetimes - could be shorter than the format arguments object. - - It can be implicitly converted into `~fmt::basic_format_args` for passing - into type-erased formatting functions such as `~fmt::vformat`. - \endrst - */ -template -class dynamic_format_arg_store -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - // Workaround a GCC template argument substitution bug. - : public basic_format_args -#endif -{ - private: - using char_type = typename Context::char_type; - - template struct need_copy { - static constexpr detail::type mapped_type = - detail::mapped_type_constant::value; - - enum { - value = !(detail::is_reference_wrapper::value || - std::is_same>::value || - std::is_same>::value || - (mapped_type != detail::type::cstring_type && - mapped_type != detail::type::string_type && - mapped_type != detail::type::custom_type)) - }; - }; - - template - using stored_type = conditional_t::value, - std::basic_string, T>; - - // Storage of basic_format_arg must be contiguous. - std::vector> data_; - std::vector> named_info_; - - // Storage of arguments not fitting into basic_format_arg must grow - // without relocation because items in data_ refer to it. - detail::dynamic_arg_list dynamic_args_; - - friend class basic_format_args; - - unsigned long long get_types() const { - return detail::is_unpacked_bit | data_.size() | - (named_info_.empty() - ? 0ULL - : static_cast(detail::has_named_args_bit)); - } - - const basic_format_arg* data() const { - return named_info_.empty() ? data_.data() : data_.data() + 1; - } - - template void emplace_arg(const T& arg) { - data_.emplace_back(detail::make_arg(arg)); - } - - template - void emplace_arg(const detail::named_arg& arg) { - if (named_info_.empty()) { - constexpr const detail::named_arg_info* zero_ptr{nullptr}; - data_.insert(data_.begin(), {zero_ptr, 0}); - } - data_.emplace_back(detail::make_arg(detail::unwrap(arg.value))); - auto pop_one = [](std::vector>* data) { - data->pop_back(); - }; - std::unique_ptr>, decltype(pop_one)> - guard{&data_, pop_one}; - named_info_.push_back({arg.name, static_cast(data_.size() - 2u)}); - data_[0].value_.named_args = {named_info_.data(), named_info_.size()}; - guard.release(); - } - - public: - /** - \rst - Adds an argument into the dynamic store for later passing to a formatting - function. - - Note that custom types and string types (but not string views) are copied - into the store dynamically allocating memory if necessary. - - **Example**:: - - fmt::dynamic_format_arg_store store; - store.push_back(42); - store.push_back("abc"); - store.push_back(1.5f); - std::string result = fmt::vformat("{} and {} and {}", store); - \endrst - */ - template void push_back(const T& arg) { - if (detail::const_check(need_copy::value)) - emplace_arg(dynamic_args_.push>(arg)); - else - emplace_arg(detail::unwrap(arg)); - } - - /** - \rst - Adds a reference to the argument into the dynamic store for later passing to - a formatting function. Supports named arguments wrapped in - ``std::reference_wrapper`` via ``std::ref()``/``std::cref()``. - - **Example**:: - - fmt::dynamic_format_arg_store store; - char str[] = "1234567890"; - store.push_back(std::cref(str)); - int a1_val{42}; - auto a1 = fmt::arg("a1_", a1_val); - store.push_back(std::cref(a1)); - - // Changing str affects the output but only for string and custom types. - str[0] = 'X'; - - std::string result = fmt::vformat("{} and {a1_}"); - assert(result == "X234567890 and 42"); - \endrst - */ - template void push_back(std::reference_wrapper arg) { - static_assert( - detail::is_named_arg::type>::value || - need_copy::value, - "objects of built-in types and string views are always copied"); - emplace_arg(arg.get()); - } - - /** - Adds named argument into the dynamic store for later passing to a formatting - function. ``std::reference_wrapper`` is supported to avoid copying of the - argument. - */ - template - void push_back(const detail::named_arg& arg) { - const char_type* arg_name = - dynamic_args_.push>(arg.name).c_str(); - if (detail::const_check(need_copy::value)) { - emplace_arg( - fmt::arg(arg_name, dynamic_args_.push>(arg.value))); - } else { - emplace_arg(fmt::arg(arg_name, arg.value)); - } - } - - /** Erase all elements from the store */ - void clear() { - data_.clear(); - named_info_.clear(); - dynamic_args_ = detail::dynamic_arg_list(); - } - - /** - \rst - Reserves space to store at least *new_cap* arguments including - *new_cap_named* named arguments. - \endrst - */ - void reserve(size_t new_cap, size_t new_cap_named) { - FMT_ASSERT(new_cap >= new_cap_named, - "Set of arguments includes set of named arguments"); - data_.reserve(new_cap); - named_info_.reserve(new_cap_named); - } -}; - /** \rst A view of a collection of formatting arguments. To avoid lifetime issues it @@ -1854,25 +1769,27 @@ template class basic_format_args { const format_arg* args_; }; - bool is_packed() const { return (desc_ & detail::is_unpacked_bit) == 0; } - bool has_named_args() const { + constexpr auto is_packed() const -> bool { + return (desc_ & detail::is_unpacked_bit) == 0; + } + auto has_named_args() const -> bool { return (desc_ & detail::has_named_args_bit) != 0; } - detail::type type(int index) const { + FMT_CONSTEXPR auto type(int index) const -> detail::type { int shift = index * detail::packed_arg_bits; unsigned int mask = (1 << detail::packed_arg_bits) - 1; return static_cast((desc_ >> shift) & mask); } - basic_format_args(unsigned long long desc, - const detail::value* values) + constexpr FMT_INLINE basic_format_args(unsigned long long desc, + const detail::value* values) : desc_(desc), values_(values) {} - basic_format_args(unsigned long long desc, const format_arg* args) + constexpr basic_format_args(unsigned long long desc, const format_arg* args) : desc_(desc), args_(args) {} public: - basic_format_args() : desc_(0) {} + constexpr basic_format_args() : desc_(0), args_(nullptr) {} /** \rst @@ -1880,8 +1797,10 @@ template class basic_format_args { \endrst */ template - FMT_INLINE basic_format_args(const format_arg_store& store) - : basic_format_args(store.desc, store.data_.args()) {} + constexpr FMT_INLINE basic_format_args( + const format_arg_store& store) + : basic_format_args(format_arg_store::desc, + store.data_.args()) {} /** \rst @@ -1889,7 +1808,8 @@ template class basic_format_args { `~fmt::dynamic_format_arg_store`. \endrst */ - FMT_INLINE basic_format_args(const dynamic_format_arg_store& store) + constexpr FMT_INLINE basic_format_args( + const dynamic_format_arg_store& store) : basic_format_args(store.get_types(), store.data()) {} /** @@ -1897,12 +1817,12 @@ template class basic_format_args { Constructs a `basic_format_args` object from a dynamic set of arguments. \endrst */ - basic_format_args(const format_arg* args, int count) + constexpr basic_format_args(const format_arg* args, int count) : basic_format_args(detail::is_unpacked_bit | detail::to_unsigned(count), args) {} /** Returns the argument with the specified id. */ - format_arg get(int id) const { + FMT_CONSTEXPR auto get(int id) const -> format_arg { format_arg arg; if (!is_packed()) { if (id < max_size()) arg = args_[id]; @@ -1915,12 +1835,14 @@ template class basic_format_args { return arg; } - template format_arg get(basic_string_view name) const { + template + auto get(basic_string_view name) const -> format_arg { int id = get_id(name); return id >= 0 ? get(id) : format_arg(); } - template int get_id(basic_string_view name) const { + template + auto get_id(basic_string_view name) const -> int { if (!has_named_args()) return -1; const auto& named_args = (is_packed() ? values_[-1] : args_[-1].value_).named_args; @@ -1930,87 +1852,1071 @@ template class basic_format_args { return -1; } - int max_size() const { + auto max_size() const -> int { unsigned long long max_packed = detail::max_packed_args; return static_cast(is_packed() ? max_packed : desc_ & ~detail::is_unpacked_bit); } }; -#ifdef FMT_ARM_ABI_COMPATIBILITY /** An alias to ``basic_format_args``. */ -// Separate types would result in shorter symbols but break ABI compatibility +// A separate type would result in shorter symbols but break ABI compatibility // between clang and gcc on ARM (#1919). using format_args = basic_format_args; -using wformat_args = basic_format_args; -#else -// DEPRECATED! These are kept for ABI compatibility. -// It is a separate type rather than an alias to make symbols readable. -struct format_args : basic_format_args { - template - FMT_INLINE format_args(const Args&... args) : basic_format_args(args...) {} + +// We cannot use enum classes as bit fields because of a gcc bug +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414. +namespace align { +enum type { none, left, right, center, numeric }; +} +using align_t = align::type; +namespace sign { +enum type { none, minus, plus, space }; +} +using sign_t = sign::type; + +FMT_BEGIN_DETAIL_NAMESPACE + +void throw_format_error(const char* message); + +// Workaround an array initialization issue in gcc 4.8. +template struct fill_t { + private: + enum { max_size = 4 }; + Char data_[max_size] = {Char(' '), Char(0), Char(0), Char(0)}; + unsigned char size_ = 1; + + public: + FMT_CONSTEXPR void operator=(basic_string_view s) { + auto size = s.size(); + if (size > max_size) return throw_format_error("invalid fill"); + for (size_t i = 0; i < size; ++i) data_[i] = s[i]; + size_ = static_cast(size); + } + + constexpr auto size() const -> size_t { return size_; } + constexpr auto data() const -> const Char* { return data_; } + + FMT_CONSTEXPR auto operator[](size_t index) -> Char& { return data_[index]; } + FMT_CONSTEXPR auto operator[](size_t index) const -> const Char& { + return data_[index]; + } }; -struct wformat_args : basic_format_args { - using basic_format_args::basic_format_args; +FMT_END_DETAIL_NAMESPACE + +// Format specifiers for built-in and string types. +template struct basic_format_specs { + int width; + int precision; + char type; + align_t align : 4; + sign_t sign : 3; + bool alt : 1; // Alternate form ('#'). + bool localized : 1; + detail::fill_t fill; + + constexpr basic_format_specs() + : width(0), + precision(-1), + type(0), + align(align::none), + sign(sign::none), + alt(false), + localized(false) {} }; + +using format_specs = basic_format_specs; + +FMT_BEGIN_DETAIL_NAMESPACE + +enum class arg_id_kind { none, index, name }; + +// An argument reference. +template struct arg_ref { + FMT_CONSTEXPR arg_ref() : kind(arg_id_kind::none), val() {} + + FMT_CONSTEXPR explicit arg_ref(int index) + : kind(arg_id_kind::index), val(index) {} + FMT_CONSTEXPR explicit arg_ref(basic_string_view name) + : kind(arg_id_kind::name), val(name) {} + + FMT_CONSTEXPR auto operator=(int idx) -> arg_ref& { + kind = arg_id_kind::index; + val.index = idx; + return *this; + } + + arg_id_kind kind; + union value { + FMT_CONSTEXPR value(int id = 0) : index{id} {} + FMT_CONSTEXPR value(basic_string_view n) : name(n) {} + + int index; + basic_string_view name; + } val; +}; + +// Format specifiers with width and precision resolved at formatting rather +// than parsing time to allow re-using the same parsed specifiers with +// different sets of arguments (precompilation of format strings). +template +struct dynamic_format_specs : basic_format_specs { + arg_ref width_ref; + arg_ref precision_ref; +}; + +struct auto_id {}; + +// A format specifier handler that sets fields in basic_format_specs. +template class specs_setter { + protected: + basic_format_specs& specs_; + + public: + explicit FMT_CONSTEXPR specs_setter(basic_format_specs& specs) + : specs_(specs) {} + + FMT_CONSTEXPR specs_setter(const specs_setter& other) + : specs_(other.specs_) {} + + FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; } + FMT_CONSTEXPR void on_fill(basic_string_view fill) { + specs_.fill = fill; + } + FMT_CONSTEXPR void on_sign(sign_t s) { specs_.sign = s; } + FMT_CONSTEXPR void on_hash() { specs_.alt = true; } + FMT_CONSTEXPR void on_localized() { specs_.localized = true; } + + FMT_CONSTEXPR void on_zero() { + if (specs_.align == align::none) specs_.align = align::numeric; + specs_.fill[0] = Char('0'); + } + + FMT_CONSTEXPR void on_width(int width) { specs_.width = width; } + FMT_CONSTEXPR void on_precision(int precision) { + specs_.precision = precision; + } + FMT_CONSTEXPR void end_precision() {} + + FMT_CONSTEXPR void on_type(Char type) { + specs_.type = static_cast(type); + } +}; + +// Format spec handler that saves references to arguments representing dynamic +// width and precision to be resolved at formatting time. +template +class dynamic_specs_handler + : public specs_setter { + public: + using char_type = typename ParseContext::char_type; + + FMT_CONSTEXPR dynamic_specs_handler(dynamic_format_specs& specs, + ParseContext& ctx) + : specs_setter(specs), specs_(specs), context_(ctx) {} + + FMT_CONSTEXPR dynamic_specs_handler(const dynamic_specs_handler& other) + : specs_setter(other), + specs_(other.specs_), + context_(other.context_) {} + + template FMT_CONSTEXPR void on_dynamic_width(Id arg_id) { + specs_.width_ref = make_arg_ref(arg_id); + } + + template FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) { + specs_.precision_ref = make_arg_ref(arg_id); + } + + FMT_CONSTEXPR void on_error(const char* message) { + context_.on_error(message); + } + + private: + dynamic_format_specs& specs_; + ParseContext& context_; + + using arg_ref_type = arg_ref; + + FMT_CONSTEXPR auto make_arg_ref(int arg_id) -> arg_ref_type { + context_.check_arg_id(arg_id); + return arg_ref_type(arg_id); + } + + FMT_CONSTEXPR auto make_arg_ref(auto_id) -> arg_ref_type { + return arg_ref_type(context_.next_arg_id()); + } + + FMT_CONSTEXPR auto make_arg_ref(basic_string_view arg_id) + -> arg_ref_type { + context_.check_arg_id(arg_id); + basic_string_view format_str( + context_.begin(), to_unsigned(context_.end() - context_.begin())); + return arg_ref_type(arg_id); + } +}; + +template constexpr bool is_ascii_letter(Char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +// Converts a character to ASCII. Returns a number > 127 on conversion failure. +template ::value)> +constexpr auto to_ascii(Char value) -> Char { + return value; +} +template ::value)> +constexpr auto to_ascii(Char value) -> + typename std::underlying_type::type { + return value; +} + +template +FMT_CONSTEXPR auto code_point_length(const Char* begin) -> int { + if (const_check(sizeof(Char) != 1)) return 1; + constexpr char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; + int len = lengths[static_cast(*begin) >> 3]; + + // Compute the pointer to the next character early so that the next + // iteration can start working on the next character. Neither Clang + // nor GCC figure out this reordering on their own. + return len + !len; +} + +// Return the result via the out param to workaround gcc bug 77539. +template +FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr& out) -> bool { + for (out = first; out != last; ++out) { + if (*out == value) return true; + } + return false; +} + +template <> +inline auto find(const char* first, const char* last, char value, + const char*& out) -> bool { + out = static_cast( + std::memchr(first, value, to_unsigned(last - first))); + return out != nullptr; +} + +// Parses the range [begin, end) as an unsigned integer. This function assumes +// that the range is non-empty and the first character is a digit. +template +FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end, + int error_value) noexcept -> int { + FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); + unsigned value = 0, prev = 0; + auto p = begin; + do { + prev = value; + value = value * 10 + unsigned(*p - '0'); + ++p; + } while (p != end && '0' <= *p && *p <= '9'); + auto num_digits = p - begin; + begin = p; + if (num_digits <= std::numeric_limits::digits10) + return static_cast(value); + // Check for overflow. + const unsigned max = to_unsigned((std::numeric_limits::max)()); + return num_digits == std::numeric_limits::digits10 + 1 && + prev * 10ull + unsigned(p[-1] - '0') <= max + ? static_cast(value) + : error_value; +} + +// Parses fill and alignment. +template +FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + FMT_ASSERT(begin != end, ""); + auto align = align::none; + auto p = begin + code_point_length(begin); + if (p >= end) p = begin; + for (;;) { + switch (to_ascii(*p)) { + case '<': + align = align::left; + break; + case '>': + align = align::right; + break; + case '^': + align = align::center; + break; + default: + break; + } + if (align != align::none) { + if (p != begin) { + auto c = *begin; + if (c == '{') + return handler.on_error("invalid fill character '{'"), begin; + handler.on_fill(basic_string_view(begin, to_unsigned(p - begin))); + begin = p + 1; + } else + ++begin; + handler.on_align(align); + break; + } else if (p == begin) { + break; + } + p = begin; + } + return begin; +} + +template FMT_CONSTEXPR bool is_name_start(Char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; +} + +template +FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end, + IDHandler&& handler) -> const Char* { + FMT_ASSERT(begin != end, ""); + Char c = *begin; + if (c >= '0' && c <= '9') { + int index = 0; + if (c != '0') + index = + parse_nonnegative_int(begin, end, (std::numeric_limits::max)()); + else + ++begin; + if (begin == end || (*begin != '}' && *begin != ':')) + handler.on_error("invalid format string"); + else + handler(index); + return begin; + } + if (!is_name_start(c)) { + handler.on_error("invalid format string"); + return begin; + } + auto it = begin; + do { + ++it; + } while (it != end && (is_name_start(c = *it) || ('0' <= c && c <= '9'))); + handler(basic_string_view(begin, to_unsigned(it - begin))); + return it; +} + +template +FMT_CONSTEXPR FMT_INLINE auto parse_arg_id(const Char* begin, const Char* end, + IDHandler&& handler) -> const Char* { + Char c = *begin; + if (c != '}' && c != ':') return do_parse_arg_id(begin, end, handler); + handler(); + return begin; +} + +template +FMT_CONSTEXPR auto parse_width(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + using detail::auto_id; + struct width_adapter { + Handler& handler; + + FMT_CONSTEXPR void operator()() { handler.on_dynamic_width(auto_id()); } + FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_width(id); } + FMT_CONSTEXPR void operator()(basic_string_view id) { + handler.on_dynamic_width(id); + } + FMT_CONSTEXPR void on_error(const char* message) { + if (message) handler.on_error(message); + } + }; + + FMT_ASSERT(begin != end, ""); + if ('0' <= *begin && *begin <= '9') { + int width = parse_nonnegative_int(begin, end, -1); + if (width != -1) + handler.on_width(width); + else + handler.on_error("number is too big"); + } else if (*begin == '{') { + ++begin; + if (begin != end) begin = parse_arg_id(begin, end, width_adapter{handler}); + if (begin == end || *begin != '}') + return handler.on_error("invalid format string"), begin; + ++begin; + } + return begin; +} + +template +FMT_CONSTEXPR auto parse_precision(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + using detail::auto_id; + struct precision_adapter { + Handler& handler; + + FMT_CONSTEXPR void operator()() { handler.on_dynamic_precision(auto_id()); } + FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_precision(id); } + FMT_CONSTEXPR void operator()(basic_string_view id) { + handler.on_dynamic_precision(id); + } + FMT_CONSTEXPR void on_error(const char* message) { + if (message) handler.on_error(message); + } + }; + + ++begin; + auto c = begin != end ? *begin : Char(); + if ('0' <= c && c <= '9') { + auto precision = parse_nonnegative_int(begin, end, -1); + if (precision != -1) + handler.on_precision(precision); + else + handler.on_error("number is too big"); + } else if (c == '{') { + ++begin; + if (begin != end) + begin = parse_arg_id(begin, end, precision_adapter{handler}); + if (begin == end || *begin++ != '}') + return handler.on_error("invalid format string"), begin; + } else { + return handler.on_error("missing precision specifier"), begin; + } + handler.end_precision(); + return begin; +} + +// Parses standard format specifiers and sends notifications about parsed +// components to handler. +template +FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(const Char* begin, + const Char* end, + SpecHandler&& handler) + -> const Char* { + if (begin + 1 < end && begin[1] == '}' && is_ascii_letter(*begin) && + *begin != 'L') { + handler.on_type(*begin++); + return begin; + } + + if (begin == end) return begin; + + begin = parse_align(begin, end, handler); + if (begin == end) return begin; + + // Parse sign. + switch (to_ascii(*begin)) { + case '+': + handler.on_sign(sign::plus); + ++begin; + break; + case '-': + handler.on_sign(sign::minus); + ++begin; + break; + case ' ': + handler.on_sign(sign::space); + ++begin; + break; + default: + break; + } + if (begin == end) return begin; + + if (*begin == '#') { + handler.on_hash(); + if (++begin == end) return begin; + } + + // Parse zero flag. + if (*begin == '0') { + handler.on_zero(); + if (++begin == end) return begin; + } + + begin = parse_width(begin, end, handler); + if (begin == end) return begin; + + // Parse precision. + if (*begin == '.') { + begin = parse_precision(begin, end, handler); + if (begin == end) return begin; + } + + if (*begin == 'L') { + handler.on_localized(); + ++begin; + } + + // Parse type. + if (begin != end && *begin != '}') handler.on_type(*begin++); + return begin; +} + +template +FMT_CONSTEXPR auto parse_replacement_field(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + struct id_adapter { + Handler& handler; + int arg_id; + + FMT_CONSTEXPR void operator()() { arg_id = handler.on_arg_id(); } + FMT_CONSTEXPR void operator()(int id) { arg_id = handler.on_arg_id(id); } + FMT_CONSTEXPR void operator()(basic_string_view id) { + arg_id = handler.on_arg_id(id); + } + FMT_CONSTEXPR void on_error(const char* message) { + if (message) handler.on_error(message); + } + }; + + ++begin; + if (begin == end) return handler.on_error("invalid format string"), end; + if (*begin == '}') { + handler.on_replacement_field(handler.on_arg_id(), begin); + } else if (*begin == '{') { + handler.on_text(begin, begin + 1); + } else { + auto adapter = id_adapter{handler, 0}; + begin = parse_arg_id(begin, end, adapter); + Char c = begin != end ? *begin : Char(); + if (c == '}') { + handler.on_replacement_field(adapter.arg_id, begin); + } else if (c == ':') { + begin = handler.on_format_specs(adapter.arg_id, begin + 1, end); + if (begin == end || *begin != '}') + return handler.on_error("unknown format specifier"), end; + } else { + return handler.on_error("missing '}' in format string"), end; + } + } + return begin + 1; +} + +template +FMT_CONSTEXPR FMT_INLINE void parse_format_string( + basic_string_view format_str, Handler&& handler) { + // this is most likely a name-lookup defect in msvc's modules implementation + using detail::find; + + auto begin = format_str.data(); + auto end = begin + format_str.size(); + if (end - begin < 32) { + // Use a simple loop instead of memchr for small strings. + const Char* p = begin; + while (p != end) { + auto c = *p++; + if (c == '{') { + handler.on_text(begin, p - 1); + begin = p = parse_replacement_field(p - 1, end, handler); + } else if (c == '}') { + if (p == end || *p != '}') + return handler.on_error("unmatched '}' in format string"); + handler.on_text(begin, p); + begin = ++p; + } + } + handler.on_text(begin, end); + return; + } + struct writer { + FMT_CONSTEXPR void operator()(const Char* pbegin, const Char* pend) { + if (pbegin == pend) return; + for (;;) { + const Char* p = nullptr; + if (!find(pbegin, pend, '}', p)) + return handler_.on_text(pbegin, pend); + ++p; + if (p == pend || *p != '}') + return handler_.on_error("unmatched '}' in format string"); + handler_.on_text(pbegin, p); + pbegin = p + 1; + } + } + Handler& handler_; + } write{handler}; + while (begin != end) { + // Doing two passes with memchr (one for '{' and another for '}') is up to + // 2.5x faster than the naive one-pass implementation on big format strings. + const Char* p = begin; + if (*begin != '{' && !find(begin + 1, end, '{', p)) + return write(begin, end); + write(begin, p); + begin = parse_replacement_field(p, end, handler); + } +} + +template +FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx) + -> decltype(ctx.begin()) { + using char_type = typename ParseContext::char_type; + using context = buffer_context; + using mapped_type = conditional_t< + mapped_type_constant::value != type::custom_type, + decltype(arg_mapper().map(std::declval())), T>; + auto f = conditional_t::value, + formatter, + fallback_formatter>(); + return f.parse(ctx); +} + +// A parse context with extra argument id checks. It is only used at compile +// time because adding checks at runtime would introduce substantial overhead +// and would be redundant since argument ids are checked when arguments are +// retrieved anyway. +template +class compile_parse_context + : public basic_format_parse_context { + private: + int num_args_; + using base = basic_format_parse_context; + + public: + explicit FMT_CONSTEXPR compile_parse_context( + basic_string_view format_str, + int num_args = (std::numeric_limits::max)(), ErrorHandler eh = {}) + : base(format_str, eh), num_args_(num_args) {} + + FMT_CONSTEXPR auto next_arg_id() -> int { + int id = base::next_arg_id(); + if (id >= num_args_) this->on_error("argument not found"); + return id; + } + + FMT_CONSTEXPR void check_arg_id(int id) { + base::check_arg_id(id); + if (id >= num_args_) this->on_error("argument not found"); + } + using base::check_arg_id; +}; + +template +FMT_CONSTEXPR void check_int_type_spec(char spec, ErrorHandler&& eh) { + switch (spec) { + case 0: + case 'd': + case 'x': + case 'X': + case 'b': + case 'B': + case 'o': + case 'c': + break; + default: + eh.on_error("invalid type specifier"); + break; + } +} + +// Checks char specs and returns true if the type spec is char (and not int). +template +FMT_CONSTEXPR auto check_char_specs(const basic_format_specs& specs, + ErrorHandler&& eh = {}) -> bool { + if (specs.type && specs.type != 'c') { + check_int_type_spec(specs.type, eh); + return false; + } + if (specs.align == align::numeric || specs.sign != sign::none || specs.alt) + eh.on_error("invalid format specifier for char"); + return true; +} + +// A floating-point presentation format. +enum class float_format : unsigned char { + general, // General: exponent notation or fixed point based on magnitude. + exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. + fixed, // Fixed point with the default precision of 6, e.g. 0.0012. + hex +}; + +struct float_specs { + int precision; + float_format format : 8; + sign_t sign : 8; + bool upper : 1; + bool locale : 1; + bool binary32 : 1; + bool use_grisu : 1; + bool showpoint : 1; +}; + +template +FMT_CONSTEXPR auto parse_float_type_spec(const basic_format_specs& specs, + ErrorHandler&& eh = {}) + -> float_specs { + auto result = float_specs(); + result.showpoint = specs.alt; + result.locale = specs.localized; + switch (specs.type) { + case 0: + result.format = float_format::general; + break; + case 'G': + result.upper = true; + FMT_FALLTHROUGH; + case 'g': + result.format = float_format::general; + break; + case 'E': + result.upper = true; + FMT_FALLTHROUGH; + case 'e': + result.format = float_format::exp; + result.showpoint |= specs.precision != 0; + break; + case 'F': + result.upper = true; + FMT_FALLTHROUGH; + case 'f': + result.format = float_format::fixed; + result.showpoint |= specs.precision != 0; + break; + case 'A': + result.upper = true; + FMT_FALLTHROUGH; + case 'a': + result.format = float_format::hex; + break; + default: + eh.on_error("invalid type specifier"); + break; + } + return result; +} + +template +FMT_CONSTEXPR auto check_cstring_type_spec(Char spec, ErrorHandler&& eh = {}) + -> bool { + if (spec == 0 || spec == 's') return true; + if (spec != 'p') eh.on_error("invalid type specifier"); + return false; +} + +template +FMT_CONSTEXPR void check_string_type_spec(Char spec, ErrorHandler&& eh) { + if (spec != 0 && spec != 's') eh.on_error("invalid type specifier"); +} + +template +FMT_CONSTEXPR void check_pointer_type_spec(Char spec, ErrorHandler&& eh) { + if (spec != 0 && spec != 'p') eh.on_error("invalid type specifier"); +} + +// A parse_format_specs handler that checks if specifiers are consistent with +// the argument type. +template class specs_checker : public Handler { + private: + detail::type arg_type_; + + FMT_CONSTEXPR void require_numeric_argument() { + if (!is_arithmetic_type(arg_type_)) + this->on_error("format specifier requires numeric argument"); + } + + public: + FMT_CONSTEXPR specs_checker(const Handler& handler, detail::type arg_type) + : Handler(handler), arg_type_(arg_type) {} + + FMT_CONSTEXPR void on_align(align_t align) { + if (align == align::numeric) require_numeric_argument(); + Handler::on_align(align); + } + + FMT_CONSTEXPR void on_sign(sign_t s) { + require_numeric_argument(); + if (is_integral_type(arg_type_) && arg_type_ != type::int_type && + arg_type_ != type::long_long_type && arg_type_ != type::char_type) { + this->on_error("format specifier requires signed argument"); + } + Handler::on_sign(s); + } + + FMT_CONSTEXPR void on_hash() { + require_numeric_argument(); + Handler::on_hash(); + } + + FMT_CONSTEXPR void on_localized() { + require_numeric_argument(); + Handler::on_localized(); + } + + FMT_CONSTEXPR void on_zero() { + require_numeric_argument(); + Handler::on_zero(); + } + + FMT_CONSTEXPR void end_precision() { + if (is_integral_type(arg_type_) || arg_type_ == type::pointer_type) + this->on_error("precision not allowed for this argument type"); + } +}; + +constexpr int invalid_arg_index = -1; + +#if FMT_USE_NONTYPE_TEMPLATE_PARAMETERS +template +constexpr auto get_arg_index_by_name(basic_string_view name) -> int { + if constexpr (detail::is_statically_named_arg()) { + if (name == T::name) return N; + } + if constexpr (sizeof...(Args) > 0) + return get_arg_index_by_name(name); + (void)name; // Workaround an MSVC bug about "unused" parameter. + return invalid_arg_index; +} #endif -namespace detail { +template +FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view name) -> int { +#if FMT_USE_NONTYPE_TEMPLATE_PARAMETERS + if constexpr (sizeof...(Args) > 0) + return get_arg_index_by_name<0, Args...>(name); +#endif + (void)name; + return invalid_arg_index; +} -template ::value)> -std::basic_string vformat( - basic_string_view format_str, - basic_format_args>> args); +template +class format_string_checker { + private: + using parse_context_type = compile_parse_context; + enum { num_args = sizeof...(Args) }; -FMT_API std::string vformat(string_view format_str, format_args args); + // Format specifier parsing function. + using parse_func = const Char* (*)(parse_context_type&); + + parse_context_type context_; + parse_func parse_funcs_[num_args > 0 ? num_args : 1]; + + public: + explicit FMT_CONSTEXPR format_string_checker( + basic_string_view format_str, ErrorHandler eh) + : context_(format_str, num_args, eh), + parse_funcs_{&parse_format_specs...} {} + + FMT_CONSTEXPR void on_text(const Char*, const Char*) {} + + FMT_CONSTEXPR auto on_arg_id() -> int { return context_.next_arg_id(); } + FMT_CONSTEXPR auto on_arg_id(int id) -> int { + return context_.check_arg_id(id), id; + } + FMT_CONSTEXPR auto on_arg_id(basic_string_view id) -> int { +#if FMT_USE_NONTYPE_TEMPLATE_PARAMETERS + auto index = get_arg_index_by_name(id); + if (index == invalid_arg_index) on_error("named argument is not found"); + return context_.check_arg_id(index), index; +#else + (void)id; + on_error("compile-time checks for named arguments require C++20 support"); + return 0; +#endif + } + + FMT_CONSTEXPR void on_replacement_field(int, const Char*) {} + + FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char*) + -> const Char* { + context_.advance_to(context_.begin() + (begin - &*context_.begin())); + // id >= 0 check is a workaround for gcc 10 bug (#2065). + return id >= 0 && id < num_args ? parse_funcs_[id](context_) : begin; + } + + FMT_CONSTEXPR void on_error(const char* message) { + context_.on_error(message); + } +}; + +template ::value), int>> +void check_format_string(S format_str) { + FMT_CONSTEXPR auto s = to_string_view(format_str); + using checker = format_string_checker...>; + FMT_CONSTEXPR bool invalid_format = + (parse_format_string(s, checker(s, {})), true); + (void)invalid_format; +} template void vformat_to( - buffer& buf, basic_string_view format_str, + buffer& buf, basic_string_view fmt, basic_format_args)> args, detail::locale_ref loc = {}); -template ::value)> -inline void vprint_mojibake(std::FILE*, basic_string_view, const Args&) {} - FMT_API void vprint_mojibake(std::FILE*, string_view, format_args); #ifndef _WIN32 inline void vprint_mojibake(std::FILE*, string_view, format_args) {} #endif -} // namespace detail +FMT_END_DETAIL_NAMESPACE + +// A formatter specialization for the core types corresponding to detail::type +// constants. +template +struct formatter::value != + detail::type::custom_type>> { + private: + detail::dynamic_format_specs specs_; + + public: + // Parses format specifiers stopping either at the end of the range or at the + // terminating '}'. + template + FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { + auto begin = ctx.begin(), end = ctx.end(); + if (begin == end) return begin; + using handler_type = detail::dynamic_specs_handler; + auto type = detail::type_constant::value; + auto checker = + detail::specs_checker(handler_type(specs_, ctx), type); + auto it = detail::parse_format_specs(begin, end, checker); + auto eh = ctx.error_handler(); + switch (type) { + case detail::type::none_type: + FMT_ASSERT(false, "invalid argument type"); + break; + case detail::type::bool_type: + if (!specs_.type || specs_.type == 's') break; + FMT_FALLTHROUGH; + case detail::type::int_type: + case detail::type::uint_type: + case detail::type::long_long_type: + case detail::type::ulong_long_type: + case detail::type::int128_type: + case detail::type::uint128_type: + detail::check_int_type_spec(specs_.type, eh); + break; + case detail::type::char_type: + detail::check_char_specs(specs_, eh); + break; + case detail::type::float_type: + if (detail::const_check(FMT_USE_FLOAT)) + detail::parse_float_type_spec(specs_, eh); + else + FMT_ASSERT(false, "float support disabled"); + break; + case detail::type::double_type: + if (detail::const_check(FMT_USE_DOUBLE)) + detail::parse_float_type_spec(specs_, eh); + else + FMT_ASSERT(false, "double support disabled"); + break; + case detail::type::long_double_type: + if (detail::const_check(FMT_USE_LONG_DOUBLE)) + detail::parse_float_type_spec(specs_, eh); + else + FMT_ASSERT(false, "long double support disabled"); + break; + case detail::type::cstring_type: + detail::check_cstring_type_spec(specs_.type, eh); + break; + case detail::type::string_type: + detail::check_string_type_spec(specs_.type, eh); + break; + case detail::type::pointer_type: + detail::check_pointer_type_spec(specs_.type, eh); + break; + case detail::type::custom_type: + // Custom format specifiers are checked in parse functions of + // formatter specializations. + break; + } + return it; + } + + template + FMT_CONSTEXPR auto format(const T& val, FormatContext& ctx) const + -> decltype(ctx.out()); +}; + +template struct basic_runtime { basic_string_view str; }; + +template class basic_format_string { + private: + basic_string_view str_; + + public: + template >::value)> + FMT_CONSTEVAL basic_format_string(const S& s) : str_(s) { + static_assert( + detail::count< + (std::is_base_of>::value && + std::is_reference::value)...>() == 0, + "passing views as lvalues is disallowed"); +#ifdef FMT_HAS_CONSTEVAL + if constexpr (detail::count_named_args() == 0) { + using checker = detail::format_string_checker...>; + detail::parse_format_string(str_, checker(s, {})); + } +#else + detail::check_format_string(s); +#endif + } + basic_format_string(basic_runtime r) : str_(r.str) {} + + FMT_INLINE operator basic_string_view() const { return str_; } +}; + +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 +// Workaround broken conversion on older gcc. +template using format_string = string_view; +template auto runtime(const S& s) -> basic_string_view> { + return s; +} +#else +template +using format_string = basic_format_string...>; +// Creates a runtime format string. +template auto runtime(const S& s) -> basic_runtime> { + return {{s}}; +} +#endif + +FMT_API auto vformat(string_view fmt, format_args args) -> std::string; + +/** + \rst + Formats ``args`` according to specifications in ``fmt`` and returns the result + as a string. + + **Example**:: + + #include + std::string message = fmt::format("The answer is {}", 42); + \endrst +*/ +template +FMT_INLINE auto format(format_string fmt, T&&... args) -> std::string { + return vformat(fmt, fmt::make_format_args(args...)); +} /** Formats a string and writes the output to ``out``. */ -// GCC 8 and earlier cannot handle std::back_insert_iterator with -// vformat_to(...) overload, so SFINAE on iterator type instead. -template , - bool enable = detail::is_output_iterator::value> -auto vformat_to(OutputIt out, const S& format_str, - basic_format_args>> args) - -> typename std::enable_if::type { - decltype(detail::get_buffer(out)) buf(detail::get_buffer_init(out)); - detail::vformat_to(buf, to_string_view(format_str), args); +template ::value)> +auto vformat_to(OutputIt out, string_view fmt, format_args args) -> OutputIt { + using detail::get_buffer; + auto&& buf = get_buffer(out); + detail::vformat_to(buf, string_view(fmt), args); return detail::get_iterator(buf); } /** \rst - Formats arguments, writes the result to the output iterator ``out`` and returns - the iterator past the end of the output range. + Formats ``args`` according to specifications in ``fmt``, writes the result to + the output iterator ``out`` and returns the iterator past the end of the output + range. **Example**:: - std::vector out; + auto out = std::vector(); fmt::format_to(std::back_inserter(out), "{}", 42); \endrst */ -// We cannot use FMT_ENABLE_IF because of a bug in gcc 8.3. -template >::value> -inline auto format_to(OutputIt out, const S& format_str, Args&&... args) -> - typename std::enable_if::type { - const auto& vargs = fmt::make_args_checked(format_str, args...); - return vformat_to(out, to_string_view(format_str), vargs); +template ::value)> +FMT_INLINE auto format_to(OutputIt out, format_string fmt, T&&... args) + -> OutputIt { + return vformat_to(out, fmt, fmt::make_format_args(args...)); } template struct format_to_n_result { @@ -2020,111 +2926,81 @@ template struct format_to_n_result { size_t size; }; -template ::value)> -inline format_to_n_result vformat_to_n( - OutputIt out, size_t n, basic_string_view format_str, - basic_format_args>> args) { - detail::iterator_buffer buf(out, - n); - detail::vformat_to(buf, format_str, args); +template ::value)> +auto vformat_to_n(OutputIt out, size_t n, string_view fmt, format_args args) + -> format_to_n_result { + using buffer = + detail::iterator_buffer; + auto buf = buffer(out, n); + detail::vformat_to(buf, fmt, args); return {buf.out(), buf.count()}; } /** - \rst - Formats arguments, writes up to ``n`` characters of the result to the output - iterator ``out`` and returns the total output size and the iterator past the - end of the output range. - \endrst + \rst + Formats ``args`` according to specifications in ``fmt``, writes up to ``n`` + characters of the result to the output iterator ``out`` and returns the total + (not truncated) output size and the iterator past the end of the output range. + \endrst */ -template >::value> -inline auto format_to_n(OutputIt out, size_t n, const S& format_str, - const Args&... args) -> - typename std::enable_if>::type { - const auto& vargs = fmt::make_args_checked(format_str, args...); - return vformat_to_n(out, n, to_string_view(format_str), vargs); +template ::value)> +FMT_INLINE auto format_to_n(OutputIt out, size_t n, format_string fmt, + const T&... args) -> format_to_n_result { + return vformat_to_n(out, n, fmt, fmt::make_format_args(args...)); } -/** - Returns the number of characters in the output of - ``format(format_str, args...)``. - */ -template -inline size_t formatted_size(string_view format_str, Args&&... args) { - const auto& vargs = fmt::make_args_checked(format_str, args...); - detail::counting_buffer<> buf; - detail::vformat_to(buf, format_str, vargs); +/** Returns the number of chars in the output of ``format(fmt, args...)``. */ +template +FMT_INLINE auto formatted_size(format_string fmt, T&&... args) -> size_t { + auto buf = detail::counting_buffer<>(); + detail::vformat_to(buf, string_view(fmt), fmt::make_format_args(args...)); return buf.count(); } -template > -FMT_INLINE std::basic_string vformat( - const S& format_str, - basic_format_args>> args) { - return detail::vformat(to_string_view(format_str), args); -} +FMT_API void vprint(string_view fmt, format_args args); +FMT_API void vprint(std::FILE* f, string_view fmt, format_args args); /** \rst - Formats arguments and returns the result as a string. - - **Example**:: - - #include - std::string message = fmt::format("The answer is {}", 42); - \endrst -*/ -// Pass char_t as a default template parameter instead of using -// std::basic_string> to reduce the symbol size. -template > -FMT_INLINE std::basic_string format(const S& format_str, Args&&... args) { - const auto& vargs = fmt::make_args_checked(format_str, args...); - return detail::vformat(to_string_view(format_str), vargs); -} - -FMT_API void vprint(string_view, format_args); -FMT_API void vprint(std::FILE*, string_view, format_args); - -/** - \rst - Formats ``args`` according to specifications in ``format_str`` and writes the - output to the file ``f``. Strings are assumed to be Unicode-encoded unless the - ``FMT_UNICODE`` macro is set to 0. - - **Example**:: - - fmt::print(stderr, "Don't {}!", "panic"); - \endrst - */ -template > -inline void print(std::FILE* f, const S& format_str, Args&&... args) { - const auto& vargs = fmt::make_args_checked(format_str, args...); - return detail::is_unicode() - ? vprint(f, to_string_view(format_str), vargs) - : detail::vprint_mojibake(f, to_string_view(format_str), vargs); -} - -/** - \rst - Formats ``args`` according to specifications in ``format_str`` and writes - the output to ``stdout``. Strings are assumed to be Unicode-encoded unless - the ``FMT_UNICODE`` macro is set to 0. + Formats ``args`` according to specifications in ``fmt`` and writes the output + to ``stdout``. **Example**:: fmt::print("Elapsed time: {0:.2f} seconds", 1.23); \endrst */ -template > -inline void print(const S& format_str, Args&&... args) { - const auto& vargs = fmt::make_args_checked(format_str, args...); - return detail::is_unicode() - ? vprint(to_string_view(format_str), vargs) - : detail::vprint_mojibake(stdout, to_string_view(format_str), - vargs); +template +FMT_INLINE void print(format_string fmt, T&&... args) { + const auto& vargs = fmt::make_format_args(args...); + return detail::is_utf8() ? vprint(fmt, vargs) + : detail::vprint_mojibake(stdout, fmt, vargs); } + +/** + \rst + Formats ``args`` according to specifications in ``fmt`` and writes the + output to the file ``f``. + + **Example**:: + + fmt::print(stderr, "Don't {}!", "panic"); + \endrst + */ +template +FMT_INLINE void print(std::FILE* f, format_string fmt, T&&... args) { + const auto& vargs = fmt::make_format_args(args...); + return detail::is_utf8() ? vprint(f, fmt, vargs) + : detail::vprint_mojibake(f, fmt, vargs); +} + +FMT_MODULE_EXPORT_END +FMT_GCC_PRAGMA("GCC pop_options") FMT_END_NAMESPACE +#ifdef FMT_HEADER_ONLY +# include "format.h" +#endif #endif // FMT_CORE_H_ diff --git a/src/fmt/format-inl.h b/src/fmt/format-inl.h index 8f2fe7354a..a802aea5e1 100644 --- a/src/fmt/format-inl.h +++ b/src/fmt/format-inl.h @@ -8,8 +8,9 @@ #ifndef FMT_FORMAT_INL_H_ #define FMT_FORMAT_INL_H_ -#include +#include #include +#include // errno #include #include #include @@ -27,11 +28,6 @@ #include "format.h" -// Dummy implementations of strerror_r and strerror_s called if corresponding -// system functions are not available. -inline fmt::detail::null<> strerror_r(int, char*, ...) { return {}; } -inline fmt::detail::null<> strerror_s(char*, size_t, ...) { return {}; } - FMT_BEGIN_NAMESPACE namespace detail { @@ -57,76 +53,6 @@ inline int fmt_snprintf(char* buffer, size_t size, const char* format, ...) { # define FMT_SNPRINTF fmt_snprintf #endif // _MSC_VER -// A portable thread-safe version of strerror. -// Sets buffer to point to a string describing the error code. -// This can be either a pointer to a string stored in buffer, -// or a pointer to some static immutable string. -// Returns one of the following values: -// 0 - success -// ERANGE - buffer is not large enough to store the error message -// other - failure -// Buffer should be at least of size 1. -inline int safe_strerror(int error_code, char*& buffer, - size_t buffer_size) FMT_NOEXCEPT { - FMT_ASSERT(buffer != nullptr && buffer_size != 0, "invalid buffer"); - - class dispatcher { - private: - int error_code_; - char*& buffer_; - size_t buffer_size_; - - // A noop assignment operator to avoid bogus warnings. - void operator=(const dispatcher&) {} - - // Handle the result of XSI-compliant version of strerror_r. - int handle(int result) { - // glibc versions before 2.13 return result in errno. - return result == -1 ? errno : result; - } - - // Handle the result of GNU-specific version of strerror_r. - FMT_MAYBE_UNUSED - int handle(char* message) { - // If the buffer is full then the message is probably truncated. - if (message == buffer_ && strlen(buffer_) == buffer_size_ - 1) - return ERANGE; - buffer_ = message; - return 0; - } - - // Handle the case when strerror_r is not available. - FMT_MAYBE_UNUSED - int handle(detail::null<>) { - return fallback(strerror_s(buffer_, buffer_size_, error_code_)); - } - - // Fallback to strerror_s when strerror_r is not available. - FMT_MAYBE_UNUSED - int fallback(int result) { - // If the buffer is full then the message is probably truncated. - return result == 0 && strlen(buffer_) == buffer_size_ - 1 ? ERANGE - : result; - } - -#if !FMT_MSC_VER - // Fallback to strerror if strerror_r and strerror_s are not available. - int fallback(detail::null<>) { - errno = 0; - buffer_ = strerror(error_code_); - return errno; - } -#endif - - public: - dispatcher(int err_code, char*& buf, size_t buf_size) - : error_code_(err_code), buffer_(buf), buffer_size_(buf_size) {} - - int run() { return handle(strerror_r(error_code_, buffer_, buffer_size_)); } - }; - return dispatcher(error_code, buffer, buffer_size).run(); -} - FMT_FUNC void format_error_code(detail::buffer& out, int error_code, string_view message) FMT_NOEXCEPT { // Report error code making sure that the output fits into @@ -145,18 +71,18 @@ FMT_FUNC void format_error_code(detail::buffer& out, int error_code, error_code_size += detail::to_unsigned(detail::count_digits(abs_value)); auto it = buffer_appender(out); if (message.size() <= inline_buffer_size - error_code_size) - format_to(it, "{}{}", message, SEP); - format_to(it, "{}{}", ERROR_STR, error_code); - assert(out.size() <= inline_buffer_size); + format_to(it, FMT_STRING("{}{}"), message, SEP); + format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); + FMT_ASSERT(out.size() <= inline_buffer_size, ""); } FMT_FUNC void report_error(format_func func, int error_code, - string_view message) FMT_NOEXCEPT { + const char* message) FMT_NOEXCEPT { memory_buffer full_message; func(full_message, error_code, message); // Don't use fwrite_fully because the latter may throw. - (void)std::fwrite(full_message.data(), full_message.size(), 1, stderr); - std::fputc('\n', stderr); + if (std::fwrite(full_message.data(), full_message.size(), 1, stderr) > 0) + std::fputc('\n', stderr); } // A wrapper around fwrite that throws on error. @@ -165,11 +91,8 @@ inline void fwrite_fully(const void* ptr, size_t size, size_t count, size_t written = std::fwrite(ptr, size, count, stream); if (written < count) FMT_THROW(system_error(errno, "cannot write to file")); } -} // namespace detail - -#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR) -namespace detail { +#ifndef FMT_STATIC_THOUSANDS_SEPARATOR template locale_ref::locale_ref(const Locale& loc) : locale_(&loc) { static_assert(std::is_same::value, ""); @@ -180,41 +103,36 @@ template Locale locale_ref::get() const { return locale_ ? *static_cast(locale_) : std::locale(); } -template FMT_FUNC std::string grouping_impl(locale_ref loc) { - return std::use_facet>(loc.get()).grouping(); -} -template FMT_FUNC Char thousands_sep_impl(locale_ref loc) { - return std::use_facet>(loc.get()) - .thousands_sep(); +template +FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result { + auto& facet = std::use_facet>(loc.get()); + auto grouping = facet.grouping(); + auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep(); + return {std::move(grouping), thousands_sep}; } template FMT_FUNC Char decimal_point_impl(locale_ref loc) { return std::use_facet>(loc.get()) .decimal_point(); } -} // namespace detail #else template -FMT_FUNC std::string detail::grouping_impl(locale_ref) { - return "\03"; +FMT_FUNC auto thousands_sep_impl(locale_ref) -> thousands_sep_result { + return {"\03", FMT_STATIC_THOUSANDS_SEPARATOR}; } -template FMT_FUNC Char detail::thousands_sep_impl(locale_ref) { - return FMT_STATIC_THOUSANDS_SEPARATOR; -} -template FMT_FUNC Char detail::decimal_point_impl(locale_ref) { +template FMT_FUNC Char decimal_point_impl(locale_ref) { return '.'; } #endif +} // namespace detail +#if !FMT_MSC_VER FMT_API FMT_FUNC format_error::~format_error() FMT_NOEXCEPT = default; -FMT_API FMT_FUNC system_error::~system_error() FMT_NOEXCEPT = default; +#endif -FMT_FUNC void system_error::init(int err_code, string_view format_str, - format_args args) { - error_code_ = err_code; - memory_buffer buffer; - format_system_error(buffer, err_code, vformat(format_str, args)); - std::runtime_error& base = *this; - base = std::runtime_error(to_string(buffer)); +FMT_FUNC std::system_error vsystem_error(int error_code, string_view format_str, + format_args args) { + auto ec = std::error_code(error_code, std::generic_category()); + return std::system_error(ec, vformat(format_str, args)); } namespace detail { @@ -227,853 +145,15 @@ template <> FMT_FUNC int count_digits<4>(detail::fallback_uintptr n) { return i >= 0 ? i * char_digits + count_digits<4, unsigned>(n.value[i]) : 1; } +#if __cplusplus < 201703L +template constexpr const char basic_data::digits[][2]; +template constexpr const char basic_data::hex_digits[]; +template constexpr const char basic_data::signs[]; +template constexpr const unsigned basic_data::prefixes[]; +template constexpr const char basic_data::left_padding_shifts[]; template -const typename basic_data::digit_pair basic_data::digits[] = { - {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, - {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, - {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, - {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, - {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, - {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, - {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, - {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, - {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, - {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, - {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, - {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'}, - {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, - {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, - {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, - {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, - {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; - -template -const char basic_data::hex_digits[] = "0123456789abcdef"; - -#define FMT_POWERS_OF_10(factor) \ - factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ - (factor)*1000000, (factor)*10000000, (factor)*100000000, \ - (factor)*1000000000 - -template -const uint64_t basic_data::powers_of_10_64[] = { - 1, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), - 10000000000000000000ULL}; - -template -const uint32_t basic_data::zero_or_powers_of_10_32[] = {0, - FMT_POWERS_OF_10(1)}; -template -const uint64_t basic_data::zero_or_powers_of_10_64[] = { - 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), - 10000000000000000000ULL}; - -template -const uint32_t basic_data::zero_or_powers_of_10_32_new[] = { - 0, 0, FMT_POWERS_OF_10(1)}; - -template -const uint64_t basic_data::zero_or_powers_of_10_64_new[] = { - 0, 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), - 10000000000000000000ULL}; - -// Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. -// These are generated by support/compute-powers.py. -template -const uint64_t basic_data::grisu_pow10_significands[] = { - 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, - 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, - 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, - 0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5, - 0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57, - 0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7, - 0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e, - 0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996, - 0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126, - 0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053, - 0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f, - 0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b, - 0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06, - 0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb, - 0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000, - 0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984, - 0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068, - 0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8, - 0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758, - 0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85, - 0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d, - 0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25, - 0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2, - 0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a, - 0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410, - 0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129, - 0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85, - 0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841, - 0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b, -}; - -// Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding -// to significands above. -template -const int16_t basic_data::grisu_pow10_exponents[] = { - -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, - -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, - -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, - -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, - -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, - 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, - 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, - 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; - -template -const divtest_table_entry basic_data::divtest_table_for_pow5_32[] = - {{0x00000001, 0xffffffff}, {0xcccccccd, 0x33333333}, - {0xc28f5c29, 0x0a3d70a3}, {0x26e978d5, 0x020c49ba}, - {0x3afb7e91, 0x0068db8b}, {0x0bcbe61d, 0x0014f8b5}, - {0x68c26139, 0x000431bd}, {0xae8d46a5, 0x0000d6bf}, - {0x22e90e21, 0x00002af3}, {0x3a2e9c6d, 0x00000897}, - {0x3ed61f49, 0x000001b7}}; - -template -const divtest_table_entry basic_data::divtest_table_for_pow5_64[] = - {{0x0000000000000001, 0xffffffffffffffff}, - {0xcccccccccccccccd, 0x3333333333333333}, - {0x8f5c28f5c28f5c29, 0x0a3d70a3d70a3d70}, - {0x1cac083126e978d5, 0x020c49ba5e353f7c}, - {0xd288ce703afb7e91, 0x0068db8bac710cb2}, - {0x5d4e8fb00bcbe61d, 0x0014f8b588e368f0}, - {0x790fb65668c26139, 0x000431bde82d7b63}, - {0xe5032477ae8d46a5, 0x0000d6bf94d5e57a}, - {0xc767074b22e90e21, 0x00002af31dc46118}, - {0x8e47ce423a2e9c6d, 0x0000089705f4136b}, - {0x4fa7f60d3ed61f49, 0x000001b7cdfd9d7b}, - {0x0fee64690c913975, 0x00000057f5ff85e5}, - {0x3662e0e1cf503eb1, 0x000000119799812d}, - {0xa47a2cf9f6433fbd, 0x0000000384b84d09}, - {0x54186f653140a659, 0x00000000b424dc35}, - {0x7738164770402145, 0x0000000024075f3d}, - {0xe4a4d1417cd9a041, 0x000000000734aca5}, - {0xc75429d9e5c5200d, 0x000000000170ef54}, - {0xc1773b91fac10669, 0x000000000049c977}, - {0x26b172506559ce15, 0x00000000000ec1e4}, - {0xd489e3a9addec2d1, 0x000000000002f394}, - {0x90e860bb892c8d5d, 0x000000000000971d}, - {0x502e79bf1b6f4f79, 0x0000000000001e39}, - {0xdcd618596be30fe5, 0x000000000000060b}}; - -template -const uint64_t basic_data::dragonbox_pow10_significands_64[] = { - 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f, - 0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb, - 0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28, - 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb, - 0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a, - 0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810, - 0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff, - 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd, - 0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424, - 0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b, - 0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000, - 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000, - 0xc350000000000000, 0xf424000000000000, 0x9896800000000000, - 0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000, - 0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000, - 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000, - 0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000, - 0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000, - 0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0, - 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940984, - 0xa18f07d736b90be5, 0xc9f2c9cd04674ede, 0xfc6f7c4045812296, - 0x9dc5ada82b70b59d, 0xc5371912364ce305, 0xf684df56c3e01bc6, - 0x9a130b963a6c115c, 0xc097ce7bc90715b3, 0xf0bdc21abb48db20, - 0x96769950b50d88f4, 0xbc143fa4e250eb31, 0xeb194f8e1ae525fd, - 0x92efd1b8d0cf37be, 0xb7abc627050305ad, 0xe596b7b0c643c719, - 0x8f7e32ce7bea5c6f, 0xb35dbf821ae4f38b, 0xe0352f62a19e306e}; - -template -const uint128_wrapper basic_data::dragonbox_pow10_significands_128[] = { -#if FMT_USE_FULL_CACHE_DRAGONBOX - {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, - {0x9faacf3df73609b1, 0x77b191618c54e9ad}, - {0xc795830d75038c1d, 0xd59df5b9ef6a2418}, - {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e}, - {0x9becce62836ac577, 0x4ee367f9430aec33}, - {0xc2e801fb244576d5, 0x229c41f793cda740}, - {0xf3a20279ed56d48a, 0x6b43527578c11110}, - {0x9845418c345644d6, 0x830a13896b78aaaa}, - {0xbe5691ef416bd60c, 0x23cc986bc656d554}, - {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9}, - {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa}, - {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54}, - {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69}, - {0x91376c36d99995be, 0x23100809b9c21fa2}, - {0xb58547448ffffb2d, 0xabd40a0c2832a78b}, - {0xe2e69915b3fff9f9, 0x16c90c8f323f516d}, - {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4}, - {0xb1442798f49ffb4a, 0x99cd11cfdf41779d}, - {0xdd95317f31c7fa1d, 0x40405643d711d584}, - {0x8a7d3eef7f1cfc52, 0x482835ea666b2573}, - {0xad1c8eab5ee43b66, 0xda3243650005eed0}, - {0xd863b256369d4a40, 0x90bed43e40076a83}, - {0x873e4f75e2224e68, 0x5a7744a6e804a292}, - {0xa90de3535aaae202, 0x711515d0a205cb37}, - {0xd3515c2831559a83, 0x0d5a5b44ca873e04}, - {0x8412d9991ed58091, 0xe858790afe9486c3}, - {0xa5178fff668ae0b6, 0x626e974dbe39a873}, - {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, - {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a}, - {0xa139029f6a239f72, 0x1c1fffc1ebc44e81}, - {0xc987434744ac874e, 0xa327ffb266b56221}, - {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9}, - {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa}, - {0xc4ce17b399107c22, 0xcb550fb4384d21d4}, - {0xf6019da07f549b2b, 0x7e2a53a146606a49}, - {0x99c102844f94e0fb, 0x2eda7444cbfc426e}, - {0xc0314325637a1939, 0xfa911155fefb5309}, - {0xf03d93eebc589f88, 0x793555ab7eba27cb}, - {0x96267c7535b763b5, 0x4bc1558b2f3458df}, - {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17}, - {0xea9c227723ee8bcb, 0x465e15a979c1cadd}, - {0x92a1958a7675175f, 0x0bfacd89ec191eca}, - {0xb749faed14125d36, 0xcef980ec671f667c}, - {0xe51c79a85916f484, 0x82b7e12780e7401b}, - {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811}, - {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16}, - {0xdfbdcece67006ac9, 0x67a791e093e1d49b}, - {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1}, - {0xaecc49914078536d, 0x58fae9f773886e19}, - {0xda7f5bf590966848, 0xaf39a475506a899f}, - {0x888f99797a5e012d, 0x6d8406c952429604}, - {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84}, - {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65}, - {0x855c3be0a17fcd26, 0x5cf2eea09a550680}, - {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, - {0xd0601d8efc57b08b, 0xf13b94daf124da27}, - {0x823c12795db6ce57, 0x76c53d08d6b70859}, - {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f}, - {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a}, - {0xfe5d54150b090b02, 0xd3f93b35435d7c4d}, - {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0}, - {0xc6b8e9b0709f109a, 0x359ab6419ca1091c}, - {0xf867241c8cc6d4c0, 0xc30163d203c94b63}, - {0x9b407691d7fc44f8, 0x79e0de63425dcf1e}, - {0xc21094364dfb5636, 0x985915fc12f542e5}, - {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e}, - {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43}, - {0xbd8430bd08277231, 0x50c6ff782a838354}, - {0xece53cec4a314ebd, 0xa4f8bf5635246429}, - {0x940f4613ae5ed136, 0x871b7795e136be9a}, - {0xb913179899f68584, 0x28e2557b59846e40}, - {0xe757dd7ec07426e5, 0x331aeada2fe589d0}, - {0x9096ea6f3848984f, 0x3ff0d2c85def7622}, - {0xb4bca50b065abe63, 0x0fed077a756b53aa}, - {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895}, - {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d}, - {0xb080392cc4349dec, 0xbd8d794d96aacfb4}, - {0xdca04777f541c567, 0xecf0d7a0fc5583a1}, - {0x89e42caaf9491b60, 0xf41686c49db57245}, - {0xac5d37d5b79b6239, 0x311c2875c522ced6}, - {0xd77485cb25823ac7, 0x7d633293366b828c}, - {0x86a8d39ef77164bc, 0xae5dff9c02033198}, - {0xa8530886b54dbdeb, 0xd9f57f830283fdfd}, - {0xd267caa862a12d66, 0xd072df63c324fd7c}, - {0x8380dea93da4bc60, 0x4247cb9e59f71e6e}, - {0xa46116538d0deb78, 0x52d9be85f074e609}, - {0xcd795be870516656, 0x67902e276c921f8c}, - {0x806bd9714632dff6, 0x00ba1cd8a3db53b7}, - {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5}, - {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce}, - {0xfad2a4b13d1b5d6c, 0x796b805720085f82}, - {0x9cc3a6eec6311a63, 0xcbe3303674053bb1}, - {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d}, - {0xf4f1b4d515acb93b, 0xee92fb5515482d45}, - {0x991711052d8bf3c5, 0x751bdd152d4d1c4b}, - {0xbf5cd54678eef0b6, 0xd262d45a78a0635e}, - {0xef340a98172aace4, 0x86fb897116c87c35}, - {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1}, - {0xbae0a846d2195712, 0x8974836059cca10a}, - {0xe998d258869facd7, 0x2bd1a438703fc94c}, - {0x91ff83775423cc06, 0x7b6306a34627ddd0}, - {0xb67f6455292cbf08, 0x1a3bc84c17b1d543}, - {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94}, - {0x8e938662882af53e, 0x547eb47b7282ee9d}, - {0xb23867fb2a35b28d, 0xe99e619a4f23aa44}, - {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5}, - {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05}, - {0xae0b158b4738705e, 0x9624ab50b148d446}, - {0xd98ddaee19068c76, 0x3badd624dd9b0958}, - {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7}, - {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d}, - {0xd47487cc8470652b, 0x7647c32000696720}, - {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074}, - {0xa5fb0a17c777cf09, 0xf468107100525891}, - {0xcf79cc9db955c2cc, 0x7182148d4066eeb5}, - {0x81ac1fe293d599bf, 0xc6f14cd848405531}, - {0xa21727db38cb002f, 0xb8ada00e5a506a7d}, - {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d}, - {0xfd442e4688bd304a, 0x908f4a166d1da664}, - {0x9e4a9cec15763e2e, 0x9a598e4e043287ff}, - {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe}, - {0xf7549530e188c128, 0xd12bee59e68ef47d}, - {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf}, - {0xc13a148e3032d6e7, 0xe36a52363c1faf02}, - {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2}, - {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba}, - {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8}, - {0xebdf661791d60f56, 0x111b495b3464ad22}, - {0x936b9fcebb25c995, 0xcab10dd900beec35}, - {0xb84687c269ef3bfb, 0x3d5d514f40eea743}, - {0xe65829b3046b0afa, 0x0cb4a5a3112a5113}, - {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac}, - {0xb3f4e093db73a093, 0x59ed216765690f57}, - {0xe0f218b8d25088b8, 0x306869c13ec3532d}, - {0x8c974f7383725573, 0x1e414218c73a13fc}, - {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, - {0xdbac6c247d62a583, 0xdf45f746b74abf3a}, - {0x894bc396ce5da772, 0x6b8bba8c328eb784}, - {0xab9eb47c81f5114f, 0x066ea92f3f326565}, - {0xd686619ba27255a2, 0xc80a537b0efefebe}, - {0x8613fd0145877585, 0xbd06742ce95f5f37}, - {0xa798fc4196e952e7, 0x2c48113823b73705}, - {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6}, - {0x82ef85133de648c4, 0x9a984d73dbe722fc}, - {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb}, - {0xcc963fee10b7d1b3, 0x318df905079926a9}, - {0xffbbcfe994e5c61f, 0xfdf17746497f7053}, - {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634}, - {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1}, - {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1}, - {0x9c1661a651213e2d, 0x06bea10ca65c084f}, - {0xc31bfa0fe5698db8, 0x486e494fcff30a63}, - {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb}, - {0x986ddb5c6b3a76b7, 0xf89629465a75e01d}, - {0xbe89523386091465, 0xf6bbb397f1135824}, - {0xee2ba6c0678b597f, 0x746aa07ded582e2d}, - {0x94db483840b717ef, 0xa8c2a44eb4571cdd}, - {0xba121a4650e4ddeb, 0x92f34d62616ce414}, - {0xe896a0d7e51e1566, 0x77b020baf9c81d18}, - {0x915e2486ef32cd60, 0x0ace1474dc1d122f}, - {0xb5b5ada8aaff80b8, 0x0d819992132456bb}, - {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a}, - {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, - {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3}, - {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf}, - {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c}, - {0xad4ab7112eb3929d, 0x86c16c98d2c953c7}, - {0xd89d64d57a607744, 0xe871c7bf077ba8b8}, - {0x87625f056c7c4a8b, 0x11471cd764ad4973}, - {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0}, - {0xd389b47879823479, 0x4aff1d108d4ec2c4}, - {0x843610cb4bf160cb, 0xcedf722a585139bb}, - {0xa54394fe1eedb8fe, 0xc2974eb4ee658829}, - {0xce947a3da6a9273e, 0x733d226229feea33}, - {0x811ccc668829b887, 0x0806357d5a3f5260}, - {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8}, - {0xc9bcff6034c13052, 0xfc89b393dd02f0b6}, - {0xfc2c3f3841f17c67, 0xbbac2078d443ace3}, - {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e}, - {0xc5029163f384a931, 0x0a9e795e65d4df12}, - {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6}, - {0x99ea0196163fa42e, 0x504bced1bf8e4e46}, - {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7}, - {0xf07da27a82c37088, 0x5d767327bb4e5a4d}, - {0x964e858c91ba2655, 0x3a6a07f8d510f870}, - {0xbbe226efb628afea, 0x890489f70a55368c}, - {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f}, - {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e}, - {0xb77ada0617e3bbcb, 0x09ce6ebb40173745}, - {0xe55990879ddcaabd, 0xcc420a6a101d0516}, - {0x8f57fa54c2a9eab6, 0x9fa946824a12232e}, - {0xb32df8e9f3546564, 0x47939822dc96abfa}, - {0xdff9772470297ebd, 0x59787e2b93bc56f8}, - {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b}, - {0xaefae51477a06b03, 0xede622920b6b23f2}, - {0xdab99e59958885c4, 0xe95fab368e45ecee}, - {0x88b402f7fd75539b, 0x11dbcb0218ebb415}, - {0xaae103b5fcd2a881, 0xd652bdc29f26a11a}, - {0xd59944a37c0752a2, 0x4be76d3346f04960}, - {0x857fcae62d8493a5, 0x6f70a4400c562ddc}, - {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953}, - {0xd097ad07a71f26b2, 0x7e2000a41346a7a8}, - {0x825ecc24c873782f, 0x8ed400668c0c28c9}, - {0xa2f67f2dfa90563b, 0x728900802f0f32fb}, - {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba}, - {0xfea126b7d78186bc, 0xe2f610c84987bfa9}, - {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca}, - {0xc6ede63fa05d3143, 0x91503d1c79720dbc}, - {0xf8a95fcf88747d94, 0x75a44c6397ce912b}, - {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb}, - {0xc24452da229b021b, 0xfbe85badce996169}, - {0xf2d56790ab41c2a2, 0xfae27299423fb9c4}, - {0x97c560ba6b0919a5, 0xdccd879fc967d41b}, - {0xbdb6b8e905cb600f, 0x5400e987bbc1c921}, - {0xed246723473e3813, 0x290123e9aab23b69}, - {0x9436c0760c86e30b, 0xf9a0b6720aaf6522}, - {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, - {0xe7958cb87392c2c2, 0xb60b1d1230b20e05}, - {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3}, - {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4}, - {0xe2280b6c20dd5232, 0x25c6da63c38de1b1}, - {0x8d590723948a535f, 0x579c487e5a38ad0f}, - {0xb0af48ec79ace837, 0x2d835a9df0c6d852}, - {0xdcdb1b2798182244, 0xf8e431456cf88e66}, - {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900}, - {0xac8b2d36eed2dac5, 0xe272467e3d222f40}, - {0xd7adf884aa879177, 0x5b0ed81dcc6abb10}, - {0x86ccbb52ea94baea, 0x98e947129fc2b4ea}, - {0xa87fea27a539e9a5, 0x3f2398d747b36225}, - {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae}, - {0x83a3eeeef9153e89, 0x1953cf68300424ad}, - {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8}, - {0xcdb02555653131b6, 0x3792f412cb06794e}, - {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1}, - {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5}, - {0xc8de047564d20a8b, 0xf245825a5a445276}, - {0xfb158592be068d2e, 0xeed6e2f0f0d56713}, - {0x9ced737bb6c4183d, 0x55464dd69685606c}, - {0xc428d05aa4751e4c, 0xaa97e14c3c26b887}, - {0xf53304714d9265df, 0xd53dd99f4b3066a9}, - {0x993fe2c6d07b7fab, 0xe546a8038efe402a}, - {0xbf8fdb78849a5f96, 0xde98520472bdd034}, - {0xef73d256a5c0f77c, 0x963e66858f6d4441}, - {0x95a8637627989aad, 0xdde7001379a44aa9}, - {0xbb127c53b17ec159, 0x5560c018580d5d53}, - {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7}, - {0x9226712162ab070d, 0xcab3961304ca70e9}, - {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23}, - {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b}, - {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243}, - {0xb267ed1940f1c61c, 0x55f038b237591ed4}, - {0xdf01e85f912e37a3, 0x6b6c46dec52f6689}, - {0x8b61313bbabce2c6, 0x2323ac4b3b3da016}, - {0xae397d8aa96c1b77, 0xabec975e0a0d081b}, - {0xd9c7dced53c72255, 0x96e7bd358c904a22}, - {0x881cea14545c7575, 0x7e50d64177da2e55}, - {0xaa242499697392d2, 0xdde50bd1d5d0b9ea}, - {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865}, - {0x84ec3c97da624ab4, 0xbd5af13bef0b113f}, - {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f}, - {0xcfb11ead453994ba, 0x67de18eda5814af3}, - {0x81ceb32c4b43fcf4, 0x80eacf948770ced8}, - {0xa2425ff75e14fc31, 0xa1258379a94d028e}, - {0xcad2f7f5359a3b3e, 0x096ee45813a04331}, - {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd}, - {0x9e74d1b791e07e48, 0x775ea264cf55347e}, - {0xc612062576589dda, 0x95364afe032a819e}, - {0xf79687aed3eec551, 0x3a83ddbd83f52205}, - {0x9abe14cd44753b52, 0xc4926a9672793543}, - {0xc16d9a0095928a27, 0x75b7053c0f178294}, - {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, - {0x971da05074da7bee, 0xd3f6fc16ebca5e04}, - {0xbce5086492111aea, 0x88f4bb1ca6bcf585}, - {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6}, - {0x9392ee8e921d5d07, 0x3aff322e62439fd0}, - {0xb877aa3236a4b449, 0x09befeb9fad487c3}, - {0xe69594bec44de15b, 0x4c2ebe687989a9b4}, - {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11}, - {0xb424dc35095cd80f, 0x538484c19ef38c95}, - {0xe12e13424bb40e13, 0x2865a5f206b06fba}, - {0x8cbccc096f5088cb, 0xf93f87b7442e45d4}, - {0xafebff0bcb24aafe, 0xf78f69a51539d749}, - {0xdbe6fecebdedd5be, 0xb573440e5a884d1c}, - {0x89705f4136b4a597, 0x31680a88f8953031}, - {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e}, - {0xd6bf94d5e57a42bc, 0x3d32907604691b4d}, - {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110}, - {0xa7c5ac471b478423, 0x0fcf80dc33721d54}, - {0xd1b71758e219652b, 0xd3c36113404ea4a9}, - {0x83126e978d4fdf3b, 0x645a1cac083126ea}, - {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4}, - {0xcccccccccccccccc, 0xcccccccccccccccd}, - {0x8000000000000000, 0x0000000000000000}, - {0xa000000000000000, 0x0000000000000000}, - {0xc800000000000000, 0x0000000000000000}, - {0xfa00000000000000, 0x0000000000000000}, - {0x9c40000000000000, 0x0000000000000000}, - {0xc350000000000000, 0x0000000000000000}, - {0xf424000000000000, 0x0000000000000000}, - {0x9896800000000000, 0x0000000000000000}, - {0xbebc200000000000, 0x0000000000000000}, - {0xee6b280000000000, 0x0000000000000000}, - {0x9502f90000000000, 0x0000000000000000}, - {0xba43b74000000000, 0x0000000000000000}, - {0xe8d4a51000000000, 0x0000000000000000}, - {0x9184e72a00000000, 0x0000000000000000}, - {0xb5e620f480000000, 0x0000000000000000}, - {0xe35fa931a0000000, 0x0000000000000000}, - {0x8e1bc9bf04000000, 0x0000000000000000}, - {0xb1a2bc2ec5000000, 0x0000000000000000}, - {0xde0b6b3a76400000, 0x0000000000000000}, - {0x8ac7230489e80000, 0x0000000000000000}, - {0xad78ebc5ac620000, 0x0000000000000000}, - {0xd8d726b7177a8000, 0x0000000000000000}, - {0x878678326eac9000, 0x0000000000000000}, - {0xa968163f0a57b400, 0x0000000000000000}, - {0xd3c21bcecceda100, 0x0000000000000000}, - {0x84595161401484a0, 0x0000000000000000}, - {0xa56fa5b99019a5c8, 0x0000000000000000}, - {0xcecb8f27f4200f3a, 0x0000000000000000}, - {0x813f3978f8940984, 0x4000000000000000}, - {0xa18f07d736b90be5, 0x5000000000000000}, - {0xc9f2c9cd04674ede, 0xa400000000000000}, - {0xfc6f7c4045812296, 0x4d00000000000000}, - {0x9dc5ada82b70b59d, 0xf020000000000000}, - {0xc5371912364ce305, 0x6c28000000000000}, - {0xf684df56c3e01bc6, 0xc732000000000000}, - {0x9a130b963a6c115c, 0x3c7f400000000000}, - {0xc097ce7bc90715b3, 0x4b9f100000000000}, - {0xf0bdc21abb48db20, 0x1e86d40000000000}, - {0x96769950b50d88f4, 0x1314448000000000}, - {0xbc143fa4e250eb31, 0x17d955a000000000}, - {0xeb194f8e1ae525fd, 0x5dcfab0800000000}, - {0x92efd1b8d0cf37be, 0x5aa1cae500000000}, - {0xb7abc627050305ad, 0xf14a3d9e40000000}, - {0xe596b7b0c643c719, 0x6d9ccd05d0000000}, - {0x8f7e32ce7bea5c6f, 0xe4820023a2000000}, - {0xb35dbf821ae4f38b, 0xdda2802c8a800000}, - {0xe0352f62a19e306e, 0xd50b2037ad200000}, - {0x8c213d9da502de45, 0x4526f422cc340000}, - {0xaf298d050e4395d6, 0x9670b12b7f410000}, - {0xdaf3f04651d47b4c, 0x3c0cdd765f114000}, - {0x88d8762bf324cd0f, 0xa5880a69fb6ac800}, - {0xab0e93b6efee0053, 0x8eea0d047a457a00}, - {0xd5d238a4abe98068, 0x72a4904598d6d880}, - {0x85a36366eb71f041, 0x47a6da2b7f864750}, - {0xa70c3c40a64e6c51, 0x999090b65f67d924}, - {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d}, - {0x82818f1281ed449f, 0xbff8f10e7a8921a4}, - {0xa321f2d7226895c7, 0xaff72d52192b6a0d}, - {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490}, - {0xfee50b7025c36a08, 0x02f236d04753d5b4}, - {0x9f4f2726179a2245, 0x01d762422c946590}, - {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5}, - {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2}, - {0x9b934c3b330c8577, 0x63cc55f49f88eb2f}, - {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb}, - {0xf316271c7fc3908a, 0x8bef464e3945ef7a}, - {0x97edd871cfda3a56, 0x97758bf0e3cbb5ac}, - {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317}, - {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd}, - {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a}, - {0xb975d6b6ee39e436, 0xb3e2fd538e122b44}, - {0xe7d34c64a9c85d44, 0x60dbbca87196b616}, - {0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd}, - {0xb51d13aea4a488dd, 0x6babab6398bdbe41}, - {0xe264589a4dcdab14, 0xc696963c7eed2dd1}, - {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2}, - {0xb0de65388cc8ada8, 0x3b25a55f43294bcb}, - {0xdd15fe86affad912, 0x49ef0eb713f39ebe}, - {0x8a2dbf142dfcc7ab, 0x6e3569326c784337}, - {0xacb92ed9397bf996, 0x49c2c37f07965404}, - {0xd7e77a8f87daf7fb, 0xdc33745ec97be906}, - {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3}, - {0xa8acd7c0222311bc, 0xc40832ea0d68ce0c}, - {0xd2d80db02aabd62b, 0xf50a3fa490c30190}, - {0x83c7088e1aab65db, 0x792667c6da79e0fa}, - {0xa4b8cab1a1563f52, 0x577001b891185938}, - {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86}, - {0x80b05e5ac60b6178, 0x544f8158315b05b4}, - {0xa0dc75f1778e39d6, 0x696361ae3db1c721}, - {0xc913936dd571c84c, 0x03bc3a19cd1e38e9}, - {0xfb5878494ace3a5f, 0x04ab48a04065c723}, - {0x9d174b2dcec0e47b, 0x62eb0d64283f9c76}, - {0xc45d1df942711d9a, 0x3ba5d0bd324f8394}, - {0xf5746577930d6500, 0xca8f44ec7ee36479}, - {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb}, - {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e}, - {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e}, - {0x95d04aee3b80ece5, 0xbba1f1d158724a12}, - {0xbb445da9ca61281f, 0x2a8a6e45ae8edc97}, - {0xea1575143cf97226, 0xf52d09d71a3293bd}, - {0x924d692ca61be758, 0x593c2626705f9c56}, - {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c}, - {0xe498f455c38b997a, 0x0b6dfb9c0f956447}, - {0x8edf98b59a373fec, 0x4724bd4189bd5eac}, - {0xb2977ee300c50fe7, 0x58edec91ec2cb657}, - {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed}, - {0x8b865b215899f46c, 0xbd79e0d20082ee74}, - {0xae67f1e9aec07187, 0xecd8590680a3aa11}, - {0xda01ee641a708de9, 0xe80e6f4820cc9495}, - {0x884134fe908658b2, 0x3109058d147fdcdd}, - {0xaa51823e34a7eede, 0xbd4b46f0599fd415}, - {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a}, - {0x850fadc09923329e, 0x03e2cf6bc604ddb0}, - {0xa6539930bf6bff45, 0x84db8346b786151c}, - {0xcfe87f7cef46ff16, 0xe612641865679a63}, - {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e}, - {0xa26da3999aef7749, 0xe3be5e330f38f09d}, - {0xcb090c8001ab551c, 0x5cadf5bfd3072cc5}, - {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6}, - {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa}, - {0xc646d63501a1511d, 0xb281e1fd541501b8}, - {0xf7d88bc24209a565, 0x1f225a7ca91a4226}, - {0x9ae757596946075f, 0x3375788de9b06958}, - {0xc1a12d2fc3978937, 0x0052d6b1641c83ae}, - {0xf209787bb47d6b84, 0xc0678c5dbd23a49a}, - {0x9745eb4d50ce6332, 0xf840b7ba963646e0}, - {0xbd176620a501fbff, 0xb650e5a93bc3d898}, - {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe}, - {0x93ba47c980e98cdf, 0xc66f336c36b10137}, - {0xb8a8d9bbe123f017, 0xb80b0047445d4184}, - {0xe6d3102ad96cec1d, 0xa60dc059157491e5}, - {0x9043ea1ac7e41392, 0x87c89837ad68db2f}, - {0xb454e4a179dd1877, 0x29babe4598c311fb}, - {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a}, - {0x8ce2529e2734bb1d, 0x1899e4a65f58660c}, - {0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f}, - {0xdc21a1171d42645d, 0x76707543f4fa1f73}, - {0x899504ae72497eba, 0x6a06494a791c53a8}, - {0xabfa45da0edbde69, 0x0487db9d17636892}, - {0xd6f8d7509292d603, 0x45a9d2845d3c42b6}, - {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2}, - {0xa7f26836f282b732, 0x8e6cac7768d7141e}, - {0xd1ef0244af2364ff, 0x3207d795430cd926}, - {0x8335616aed761f1f, 0x7f44e6bd49e807b8}, - {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6}, - {0xcd036837130890a1, 0x36dba887c37a8c0f}, - {0x802221226be55a64, 0xc2494954da2c9789}, - {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c}, - {0xc83553c5c8965d3d, 0x6f92829494e5acc7}, - {0xfa42a8b73abbf48c, 0xcb772339ba1f17f9}, - {0x9c69a97284b578d7, 0xff2a760414536efb}, - {0xc38413cf25e2d70d, 0xfef5138519684aba}, - {0xf46518c2ef5b8cd1, 0x7eb258665fc25d69}, - {0x98bf2f79d5993802, 0xef2f773ffbd97a61}, - {0xbeeefb584aff8603, 0xaafb550ffacfd8fa}, - {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38}, - {0x952ab45cfa97a0b2, 0xdd945a747bf26183}, - {0xba756174393d88df, 0x94f971119aeef9e4}, - {0xe912b9d1478ceb17, 0x7a37cd5601aab85d}, - {0x91abb422ccb812ee, 0xac62e055c10ab33a}, - {0xb616a12b7fe617aa, 0x577b986b314d6009}, - {0xe39c49765fdf9d94, 0xed5a7e85fda0b80b}, - {0x8e41ade9fbebc27d, 0x14588f13be847307}, - {0xb1d219647ae6b31c, 0x596eb2d8ae258fc8}, - {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb}, - {0x8aec23d680043bee, 0x25de7bb9480d5854}, - {0xada72ccc20054ae9, 0xaf561aa79a10ae6a}, - {0xd910f7ff28069da4, 0x1b2ba1518094da04}, - {0x87aa9aff79042286, 0x90fb44d2f05d0842}, - {0xa99541bf57452b28, 0x353a1607ac744a53}, - {0xd3fa922f2d1675f2, 0x42889b8997915ce8}, - {0x847c9b5d7c2e09b7, 0x69956135febada11}, - {0xa59bc234db398c25, 0x43fab9837e699095}, - {0xcf02b2c21207ef2e, 0x94f967e45e03f4bb}, - {0x8161afb94b44f57d, 0x1d1be0eebac278f5}, - {0xa1ba1ba79e1632dc, 0x6462d92a69731732}, - {0xca28a291859bbf93, 0x7d7b8f7503cfdcfe}, - {0xfcb2cb35e702af78, 0x5cda735244c3d43e}, - {0x9defbf01b061adab, 0x3a0888136afa64a7}, - {0xc56baec21c7a1916, 0x088aaa1845b8fdd0}, - {0xf6c69a72a3989f5b, 0x8aad549e57273d45}, - {0x9a3c2087a63f6399, 0x36ac54e2f678864b}, - {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd}, - {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5}, - {0x969eb7c47859e743, 0x9f644ae5a4b1b325}, - {0xbc4665b596706114, 0x873d5d9f0dde1fee}, - {0xeb57ff22fc0c7959, 0xa90cb506d155a7ea}, - {0x9316ff75dd87cbd8, 0x09a7f12442d588f2}, - {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb2f}, - {0xe5d3ef282a242e81, 0x8f1668c8a86da5fa}, - {0x8fa475791a569d10, 0xf96e017d694487bc}, - {0xb38d92d760ec4455, 0x37c981dcc395a9ac}, - {0xe070f78d3927556a, 0x85bbe253f47b1417}, - {0x8c469ab843b89562, 0x93956d7478ccec8e}, - {0xaf58416654a6babb, 0x387ac8d1970027b2}, - {0xdb2e51bfe9d0696a, 0x06997b05fcc0319e}, - {0x88fcf317f22241e2, 0x441fece3bdf81f03}, - {0xab3c2fddeeaad25a, 0xd527e81cad7626c3}, - {0xd60b3bd56a5586f1, 0x8a71e223d8d3b074}, - {0x85c7056562757456, 0xf6872d5667844e49}, - {0xa738c6bebb12d16c, 0xb428f8ac016561db}, - {0xd106f86e69d785c7, 0xe13336d701beba52}, - {0x82a45b450226b39c, 0xecc0024661173473}, - {0xa34d721642b06084, 0x27f002d7f95d0190}, - {0xcc20ce9bd35c78a5, 0x31ec038df7b441f4}, - {0xff290242c83396ce, 0x7e67047175a15271}, - {0x9f79a169bd203e41, 0x0f0062c6e984d386}, - {0xc75809c42c684dd1, 0x52c07b78a3e60868}, - {0xf92e0c3537826145, 0xa7709a56ccdf8a82}, - {0x9bbcc7a142b17ccb, 0x88a66076400bb691}, - {0xc2abf989935ddbfe, 0x6acff893d00ea435}, - {0xf356f7ebf83552fe, 0x0583f6b8c4124d43}, - {0x98165af37b2153de, 0xc3727a337a8b704a}, - {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c}, - {0xeda2ee1c7064130c, 0x1162def06f79df73}, - {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8}, - {0xb9a74a0637ce2ee1, 0x6d953e2bd7173692}, - {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437}, - {0x910ab1d4db9914a0, 0x1d9c9892400a22a2}, - {0xb54d5e4a127f59c8, 0x2503beb6d00cab4b}, - {0xe2a0b5dc971f303a, 0x2e44ae64840fd61d}, - {0x8da471a9de737e24, 0x5ceaecfed289e5d2}, - {0xb10d8e1456105dad, 0x7425a83e872c5f47}, - {0xdd50f1996b947518, 0xd12f124e28f77719}, - {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f}, - {0xace73cbfdc0bfb7b, 0x636cc64d1001550b}, - {0xd8210befd30efa5a, 0x3c47f7e05401aa4e}, - {0x8714a775e3e95c78, 0x65acfaec34810a71}, - {0xa8d9d1535ce3b396, 0x7f1839a741a14d0d}, - {0xd31045a8341ca07c, 0x1ede48111209a050}, - {0x83ea2b892091e44d, 0x934aed0aab460432}, - {0xa4e4b66b68b65d60, 0xf81da84d5617853f}, - {0xce1de40642e3f4b9, 0x36251260ab9d668e}, - {0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019}, - {0xa1075a24e4421730, 0xb24cf65b8612f81f}, - {0xc94930ae1d529cfc, 0xdee033f26797b627}, - {0xfb9b7cd9a4a7443c, 0x169840ef017da3b1}, - {0x9d412e0806e88aa5, 0x8e1f289560ee864e}, - {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2}, - {0xf5b5d7ec8acb58a2, 0xae10af696774b1db}, - {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29}, - {0xbff610b0cc6edd3f, 0x17fd090a58d32af3}, - {0xeff394dcff8a948e, 0xddfc4b4cef07f5b0}, - {0x95f83d0a1fb69cd9, 0x4abdaf101564f98e}, - {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1}, - {0xea53df5fd18d5513, 0x84c86189216dc5ed}, - {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4}, - {0xb7118682dbb66a77, 0x3fbc8c33221dc2a1}, - {0xe4d5e82392a40515, 0x0fabaf3feaa5334a}, - {0x8f05b1163ba6832d, 0x29cb4d87f2a7400e}, - {0xb2c71d5bca9023f8, 0x743e20e9ef511012}, - {0xdf78e4b2bd342cf6, 0x914da9246b255416}, - {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e}, - {0xae9672aba3d0c320, 0xa184ac2473b529b1}, - {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e}, - {0x8865899617fb1871, 0x7e2fa67c7a658892}, - {0xaa7eebfb9df9de8d, 0xddbb901b98feeab7}, - {0xd51ea6fa85785631, 0x552a74227f3ea565}, - {0x8533285c936b35de, 0xd53a88958f87275f}, - {0xa67ff273b8460356, 0x8a892abaf368f137}, - {0xd01fef10a657842c, 0x2d2b7569b0432d85}, - {0x8213f56a67f6b29b, 0x9c3b29620e29fc73}, - {0xa298f2c501f45f42, 0x8349f3ba91b47b8f}, - {0xcb3f2f7642717713, 0x241c70a936219a73}, - {0xfe0efb53d30dd4d7, 0xed238cd383aa0110}, - {0x9ec95d1463e8a506, 0xf4363804324a40aa}, - {0xc67bb4597ce2ce48, 0xb143c6053edcd0d5}, - {0xf81aa16fdc1b81da, 0xdd94b7868e94050a}, - {0x9b10a4e5e9913128, 0xca7cf2b4191c8326}, - {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0}, - {0xf24a01a73cf2dccf, 0xbc633b39673c8cec}, - {0x976e41088617ca01, 0xd5be0503e085d813}, - {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18}, - {0xec9c459d51852ba2, 0xddf8e7d60ed1219e}, - {0x93e1ab8252f33b45, 0xcabb90e5c942b503}, - {0xb8da1662e7b00a17, 0x3d6a751f3b936243}, - {0xe7109bfba19c0c9d, 0x0cc512670a783ad4}, - {0x906a617d450187e2, 0x27fb2b80668b24c5}, - {0xb484f9dc9641e9da, 0xb1f9f660802dedf6}, - {0xe1a63853bbd26451, 0x5e7873f8a0396973}, - {0x8d07e33455637eb2, 0xdb0b487b6423e1e8}, - {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62}, - {0xdc5c5301c56b75f7, 0x7641a140cc7810fb}, - {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d}, - {0xac2820d9623bf429, 0x546345fa9fbdcd44}, - {0xd732290fbacaf133, 0xa97c177947ad4095}, - {0x867f59a9d4bed6c0, 0x49ed8eabcccc485d}, - {0xa81f301449ee8c70, 0x5c68f256bfff5a74}, - {0xd226fc195c6a2f8c, 0x73832eec6fff3111}, - {0x83585d8fd9c25db7, 0xc831fd53c5ff7eab}, - {0xa42e74f3d032f525, 0xba3e7ca8b77f5e55}, - {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb}, - {0x80444b5e7aa7cf85, 0x7980d163cf5b81b3}, - {0xa0555e361951c366, 0xd7e105bcc332621f}, - {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7}, - {0xfa856334878fc150, 0xb14f98f6f0feb951}, - {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3}, - {0xc3b8358109e84f07, 0x0a862f80ec4700c8}, - {0xf4a642e14c6262c8, 0xcd27bb612758c0fa}, - {0x98e7e9cccfbd7dbd, 0x8038d51cb897789c}, - {0xbf21e44003acdd2c, 0xe0470a63e6bd56c3}, - {0xeeea5d5004981478, 0x1858ccfce06cac74}, - {0x95527a5202df0ccb, 0x0f37801e0c43ebc8}, - {0xbaa718e68396cffd, 0xd30560258f54e6ba}, - {0xe950df20247c83fd, 0x47c6b82ef32a2069}, - {0x91d28b7416cdd27e, 0x4cdc331d57fa5441}, - {0xb6472e511c81471d, 0xe0133fe4adf8e952}, - {0xe3d8f9e563a198e5, 0x58180fddd97723a6}, - {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648}, - {0xb201833b35d63f73, 0x2cd2cc6551e513da}, - {0xde81e40a034bcf4f, 0xf8077f7ea65e58d1}, - {0x8b112e86420f6191, 0xfb04afaf27faf782}, - {0xadd57a27d29339f6, 0x79c5db9af1f9b563}, - {0xd94ad8b1c7380874, 0x18375281ae7822bc}, - {0x87cec76f1c830548, 0x8f2293910d0b15b5}, - {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb22}, - {0xd433179d9c8cb841, 0x5fa60692a46151eb}, - {0x849feec281d7f328, 0xdbc7c41ba6bcd333}, - {0xa5c7ea73224deff3, 0x12b9b522906c0800}, - {0xcf39e50feae16bef, 0xd768226b34870a00}, - {0x81842f29f2cce375, 0xe6a1158300d46640}, - {0xa1e53af46f801c53, 0x60495ae3c1097fd0}, - {0xca5e89b18b602368, 0x385bb19cb14bdfc4}, - {0xfcf62c1dee382c42, 0x46729e03dd9ed7b5}, - {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d1}, - {0xc5a05277621be293, 0xc7098b7305241885}, - {0xf70867153aa2db38, 0xb8cbee4fc66d1ea7} -#else - {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, - {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, - {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, - {0x86a8d39ef77164bc, 0xae5dff9c02033198}, - {0xd98ddaee19068c76, 0x3badd624dd9b0958}, - {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, - {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, - {0xe55990879ddcaabd, 0xcc420a6a101d0516}, - {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, - {0x95a8637627989aad, 0xdde7001379a44aa9}, - {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, - {0xc350000000000000, 0x0000000000000000}, - {0x9dc5ada82b70b59d, 0xf020000000000000}, - {0xfee50b7025c36a08, 0x02f236d04753d5b4}, - {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86}, - {0xa6539930bf6bff45, 0x84db8346b786151c}, - {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2}, - {0xd910f7ff28069da4, 0x1b2ba1518094da04}, - {0xaf58416654a6babb, 0x387ac8d1970027b2}, - {0x8da471a9de737e24, 0x5ceaecfed289e5d2}, - {0xe4d5e82392a40515, 0x0fabaf3feaa5334a}, - {0xb8da1662e7b00a17, 0x3d6a751f3b936243}, - {0x95527a5202df0ccb, 0x0f37801e0c43ebc8} +constexpr const char basic_data::right_padding_shifts[]; #endif -}; - -#if !FMT_USE_FULL_CACHE_DRAGONBOX -template -const uint64_t basic_data::powers_of_5_64[] = { - 0x0000000000000001, 0x0000000000000005, 0x0000000000000019, - 0x000000000000007d, 0x0000000000000271, 0x0000000000000c35, - 0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1, - 0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd, - 0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9, - 0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5, - 0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631, - 0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed, - 0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9}; - -template -const uint32_t basic_data::dragonbox_pow10_recovery_errors[] = { - 0x50001400, 0x54044100, 0x54014555, 0x55954415, 0x54115555, 0x00000001, - 0x50000000, 0x00104000, 0x54010004, 0x05004001, 0x55555544, 0x41545555, - 0x54040551, 0x15445545, 0x51555514, 0x10000015, 0x00101100, 0x01100015, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04450514, 0x45414110, - 0x55555145, 0x50544050, 0x15040155, 0x11054140, 0x50111514, 0x11451454, - 0x00400541, 0x00000000, 0x55555450, 0x10056551, 0x10054011, 0x55551014, - 0x69514555, 0x05151109, 0x00155555}; -#endif - -template -const char basic_data::foreground_color[] = "\x1b[38;2;"; -template -const char basic_data::background_color[] = "\x1b[48;2;"; -template const char basic_data::reset_color[] = "\x1b[0m"; -template const wchar_t basic_data::wreset_color[] = L"\x1b[0m"; -template const char basic_data::signs[] = {0, '-', '+', ' '}; -template -const char basic_data::left_padding_shifts[] = {31, 31, 0, 1, 0}; -template -const char basic_data::right_padding_shifts[] = {0, 31, 0, 1, 0}; template struct bits { static FMT_CONSTEXPR_DECL const int value = @@ -1197,6 +277,52 @@ inline fp operator*(fp x, fp y) { return {multiply(x.f, y.f), x.e + y.e + 64}; } // Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its // (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. inline fp get_cached_power(int min_exponent, int& pow10_exponent) { + // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. + // These are generated by support/compute-powers.py. + static constexpr const uint64_t pow10_significands[] = { + 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, + 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, + 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, + 0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5, + 0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57, + 0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7, + 0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e, + 0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996, + 0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126, + 0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053, + 0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f, + 0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b, + 0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06, + 0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb, + 0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000, + 0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984, + 0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068, + 0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8, + 0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758, + 0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85, + 0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d, + 0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25, + 0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2, + 0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a, + 0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410, + 0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129, + 0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85, + 0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841, + 0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b, + }; + + // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding + // to significands above. + static constexpr const int16_t pow10_exponents[] = { + -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, + -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, + -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, + -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, + -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, + 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, + 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, + 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; + const int shift = 32; const auto significand = static_cast(data::log10_2_significand); int index = static_cast( @@ -1210,8 +336,7 @@ inline fp get_cached_power(int min_exponent, int& pow10_exponent) { const int dec_exp_step = 8; index = (index - first_dec_exp - 1) / dec_exp_step + 1; pow10_exponent = first_dec_exp + index * dec_exp_step; - return {data::grisu_pow10_significands[index], - data::grisu_pow10_exponents[index]}; + return {pow10_significands[index], pow10_exponents[index]}; } // A simple accumulator to hold the sums of terms in bigint::square if uint128_t @@ -1228,7 +353,7 @@ struct accumulator { if (lower < n) ++upper; } void operator>>=(int shift) { - assert(shift == 32); + FMT_ASSERT(shift == 32, ""); (void)shift; lower = (upper << 32) | (lower >> 32); upper >>= 32; @@ -1307,7 +432,7 @@ class bigint { public: bigint() : exp_(0) {} explicit bigint(uint64_t n) { assign(n); } - ~bigint() { assert(bigits_.capacity() <= bigits_capacity); } + ~bigint() { FMT_ASSERT(bigits_.capacity() <= bigits_capacity, ""); } bigint(const bigint&) = delete; void operator=(const bigint&) = delete; @@ -1333,7 +458,7 @@ class bigint { int num_bigits() const { return static_cast(bigits_.size()) + exp_; } FMT_NOINLINE bigint& operator<<=(int shift) { - assert(shift >= 0); + FMT_ASSERT(shift >= 0, ""); exp_ += shift / bigit_bits; shift %= bigit_bits; if (shift == 0) return *this; @@ -1395,7 +520,7 @@ class bigint { // Assigns pow(10, exp) to this bigint. void assign_pow10(int exp) { - assert(exp >= 0); + FMT_ASSERT(exp >= 0, ""); if (exp == 0) return assign(1); // Find the top bit. int bitmask = 1; @@ -1414,9 +539,9 @@ class bigint { } void square() { - basic_memory_buffer n(std::move(bigits_)); int num_bigits = static_cast(bigits_.size()); int num_result_bigits = 2 * num_bigits; + basic_memory_buffer n(std::move(bigits_)); bigits_.resize(to_unsigned(num_result_bigits)); using accumulator_t = conditional_t; auto sum = accumulator_t(); @@ -1502,12 +627,19 @@ enum result { }; } +inline uint64_t power_of_10_64(int exp) { + static constexpr const uint64_t data[] = {1, FMT_POWERS_OF_10(1), + FMT_POWERS_OF_10(1000000000ULL), + 10000000000000000000ULL}; + return data[exp]; +} + // Generates output using the Grisu digit-gen algorithm. // error: the size of the region (lower, upper) outside of which numbers // definitely do not round to value (Delta in Grisu3). template -FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, - int& exp, Handler& handler) { +FMT_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, int& exp, + Handler& handler) { const fp one(1ULL << -value.e, value.e); // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be // zero because it contains a product of two 64-bit numbers with MSB set (due @@ -1519,7 +651,7 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, uint64_t fractional = value.f & (one.f - 1); exp = count_digits(integral); // kappa in Grisu. // Divide by 10 to prevent overflow. - auto result = handler.on_start(data::powers_of_10_64[exp - 1] << -one.e, + auto result = handler.on_start(power_of_10_64(exp - 1) << -one.e, value.f / 10, error * 10, exp); if (result != digits::more) return result; // Generate digits for the integral part. This can produce up to 10 digits. @@ -1569,8 +701,8 @@ FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, --exp; auto remainder = (static_cast(integral) << -one.e) + fractional; result = handler.on_digit(static_cast('0' + digit), - data::powers_of_10_64[exp] << -one.e, remainder, - error, exp, true); + power_of_10_64(exp) << -one.e, remainder, error, + exp, true); if (result != digits::more) return result; } while (exp > 0); // Generate digits for the fractional part. @@ -1643,11 +775,58 @@ struct fixed_handler { } }; +// A 128-bit integer type used internally, +struct uint128_wrapper { + uint128_wrapper() = default; + +#if FMT_USE_INT128 + uint128_t internal_; + + constexpr uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT + : internal_{static_cast(low) | + (static_cast(high) << 64)} {} + + constexpr uint128_wrapper(uint128_t u) : internal_{u} {} + + constexpr uint64_t high() const FMT_NOEXCEPT { + return uint64_t(internal_ >> 64); + } + constexpr uint64_t low() const FMT_NOEXCEPT { return uint64_t(internal_); } + + uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT { + internal_ += n; + return *this; + } +#else + uint64_t high_; + uint64_t low_; + + constexpr uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT + : high_{high}, + low_{low} {} + + constexpr uint64_t high() const FMT_NOEXCEPT { return high_; } + constexpr uint64_t low() const FMT_NOEXCEPT { return low_; } + + uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT { +# if defined(_MSC_VER) && defined(_M_X64) + unsigned char carry = _addcarry_u64(0, low_, n, &low_); + _addcarry_u64(carry, high_, 0, &high_); + return *this; +# else + uint64_t sum = low_ + n; + high_ += (sum < low_ ? 1 : 0); + low_ = sum; + return *this; +# endif + } +#endif +}; + // Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox. namespace dragonbox { // Computes 128-bit result of multiplication of two 64-bit unsigned integers. -FMT_SAFEBUFFERS inline uint128_wrapper umul128(uint64_t x, - uint64_t y) FMT_NOEXCEPT { +inline uint128_wrapper umul128(uint64_t x, uint64_t y) FMT_NOEXCEPT { #if FMT_USE_INT128 return static_cast(x) * static_cast(y); #elif defined(_MSC_VER) && defined(_M_X64) @@ -1675,8 +854,7 @@ FMT_SAFEBUFFERS inline uint128_wrapper umul128(uint64_t x, } // Computes upper 64 bits of multiplication of two 64-bit unsigned integers. -FMT_SAFEBUFFERS inline uint64_t umul128_upper64(uint64_t x, - uint64_t y) FMT_NOEXCEPT { +inline uint64_t umul128_upper64(uint64_t x, uint64_t y) FMT_NOEXCEPT { #if FMT_USE_INT128 auto p = static_cast(x) * static_cast(y); return static_cast(p >> 64); @@ -1689,8 +867,7 @@ FMT_SAFEBUFFERS inline uint64_t umul128_upper64(uint64_t x, // Computes upper 64 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -FMT_SAFEBUFFERS inline uint64_t umul192_upper64(uint64_t x, uint128_wrapper y) - FMT_NOEXCEPT { +inline uint64_t umul192_upper64(uint64_t x, uint128_wrapper y) FMT_NOEXCEPT { uint128_wrapper g0 = umul128(x, y.high()); g0 += umul128_upper64(x, y.low()); return g0.high(); @@ -1704,8 +881,7 @@ inline uint32_t umul96_upper32(uint32_t x, uint64_t y) FMT_NOEXCEPT { // Computes middle 64 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -FMT_SAFEBUFFERS inline uint64_t umul192_middle64(uint64_t x, uint128_wrapper y) - FMT_NOEXCEPT { +inline uint64_t umul192_middle64(uint64_t x, uint128_wrapper y) FMT_NOEXCEPT { uint64_t g01 = x * y.high(); uint64_t g10 = umul128_upper64(x, y.low()); return g01 + g10; @@ -1768,16 +944,52 @@ inline bool divisible_by_power_of_2(uint64_t x, int exp) FMT_NOEXCEPT { #endif } +// Table entry type for divisibility test. +template struct divtest_table_entry { + T mod_inv; + T max_quotient; +}; + // Returns true iff x is divisible by pow(5, exp). inline bool divisible_by_power_of_5(uint32_t x, int exp) FMT_NOEXCEPT { FMT_ASSERT(exp <= 10, "too large exponent"); - return x * data::divtest_table_for_pow5_32[exp].mod_inv <= - data::divtest_table_for_pow5_32[exp].max_quotient; + static constexpr const divtest_table_entry divtest_table[] = { + {0x00000001, 0xffffffff}, {0xcccccccd, 0x33333333}, + {0xc28f5c29, 0x0a3d70a3}, {0x26e978d5, 0x020c49ba}, + {0x3afb7e91, 0x0068db8b}, {0x0bcbe61d, 0x0014f8b5}, + {0x68c26139, 0x000431bd}, {0xae8d46a5, 0x0000d6bf}, + {0x22e90e21, 0x00002af3}, {0x3a2e9c6d, 0x00000897}, + {0x3ed61f49, 0x000001b7}}; + return x * divtest_table[exp].mod_inv <= divtest_table[exp].max_quotient; } inline bool divisible_by_power_of_5(uint64_t x, int exp) FMT_NOEXCEPT { FMT_ASSERT(exp <= 23, "too large exponent"); - return x * data::divtest_table_for_pow5_64[exp].mod_inv <= - data::divtest_table_for_pow5_64[exp].max_quotient; + static constexpr const divtest_table_entry divtest_table[] = { + {0x0000000000000001, 0xffffffffffffffff}, + {0xcccccccccccccccd, 0x3333333333333333}, + {0x8f5c28f5c28f5c29, 0x0a3d70a3d70a3d70}, + {0x1cac083126e978d5, 0x020c49ba5e353f7c}, + {0xd288ce703afb7e91, 0x0068db8bac710cb2}, + {0x5d4e8fb00bcbe61d, 0x0014f8b588e368f0}, + {0x790fb65668c26139, 0x000431bde82d7b63}, + {0xe5032477ae8d46a5, 0x0000d6bf94d5e57a}, + {0xc767074b22e90e21, 0x00002af31dc46118}, + {0x8e47ce423a2e9c6d, 0x0000089705f4136b}, + {0x4fa7f60d3ed61f49, 0x000001b7cdfd9d7b}, + {0x0fee64690c913975, 0x00000057f5ff85e5}, + {0x3662e0e1cf503eb1, 0x000000119799812d}, + {0xa47a2cf9f6433fbd, 0x0000000384b84d09}, + {0x54186f653140a659, 0x00000000b424dc35}, + {0x7738164770402145, 0x0000000024075f3d}, + {0xe4a4d1417cd9a041, 0x000000000734aca5}, + {0xc75429d9e5c5200d, 0x000000000170ef54}, + {0xc1773b91fac10669, 0x000000000049c977}, + {0x26b172506559ce15, 0x00000000000ec1e4}, + {0xd489e3a9addec2d1, 0x000000000002f394}, + {0x90e860bb892c8d5d, 0x000000000000971d}, + {0x502e79bf1b6f4f79, 0x0000000000001e39}, + {0xdcd618596be30fe5, 0x000000000000060b}}; + return x * divtest_table[exp].mod_inv <= divtest_table[exp].max_quotient; } // Replaces n by floor(n / pow(5, N)) returning true if and only if n is @@ -1831,7 +1043,34 @@ template <> struct cache_accessor { static uint64_t get_cached_power(int k) FMT_NOEXCEPT { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); - return data::dragonbox_pow10_significands_64[k - float_info::min_k]; + constexpr const uint64_t pow10_significands[] = { + 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f, + 0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb, + 0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28, + 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb, + 0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a, + 0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810, + 0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff, + 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd, + 0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424, + 0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b, + 0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000, + 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000, + 0xc350000000000000, 0xf424000000000000, 0x9896800000000000, + 0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000, + 0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000, + 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000, + 0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000, + 0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000, + 0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0, + 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940984, + 0xa18f07d736b90be5, 0xc9f2c9cd04674ede, 0xfc6f7c4045812296, + 0x9dc5ada82b70b59d, 0xc5371912364ce305, 0xf684df56c3e01bc6, + 0x9a130b963a6c115c, 0xc097ce7bc90715b3, 0xf0bdc21abb48db20, + 0x96769950b50d88f4, 0xbc143fa4e250eb31, 0xeb194f8e1ae525fd, + 0x92efd1b8d0cf37be, 0xb7abc627050305ad, 0xe596b7b0c643c719, + 0x8f7e32ce7bea5c6f, 0xb35dbf821ae4f38b, 0xe0352f62a19e306e}; + return pow10_significands[k - float_info::min_k]; } static carrier_uint compute_mul(carrier_uint u, @@ -1885,10 +1124,679 @@ template <> struct cache_accessor { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); + static constexpr const uint128_wrapper pow10_significands[] = { #if FMT_USE_FULL_CACHE_DRAGONBOX - return data::dragonbox_pow10_significands_128[k - - float_info::min_k]; + {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, + {0x9faacf3df73609b1, 0x77b191618c54e9ad}, + {0xc795830d75038c1d, 0xd59df5b9ef6a2418}, + {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e}, + {0x9becce62836ac577, 0x4ee367f9430aec33}, + {0xc2e801fb244576d5, 0x229c41f793cda740}, + {0xf3a20279ed56d48a, 0x6b43527578c11110}, + {0x9845418c345644d6, 0x830a13896b78aaaa}, + {0xbe5691ef416bd60c, 0x23cc986bc656d554}, + {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9}, + {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa}, + {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54}, + {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69}, + {0x91376c36d99995be, 0x23100809b9c21fa2}, + {0xb58547448ffffb2d, 0xabd40a0c2832a78b}, + {0xe2e69915b3fff9f9, 0x16c90c8f323f516d}, + {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4}, + {0xb1442798f49ffb4a, 0x99cd11cfdf41779d}, + {0xdd95317f31c7fa1d, 0x40405643d711d584}, + {0x8a7d3eef7f1cfc52, 0x482835ea666b2573}, + {0xad1c8eab5ee43b66, 0xda3243650005eed0}, + {0xd863b256369d4a40, 0x90bed43e40076a83}, + {0x873e4f75e2224e68, 0x5a7744a6e804a292}, + {0xa90de3535aaae202, 0x711515d0a205cb37}, + {0xd3515c2831559a83, 0x0d5a5b44ca873e04}, + {0x8412d9991ed58091, 0xe858790afe9486c3}, + {0xa5178fff668ae0b6, 0x626e974dbe39a873}, + {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, + {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a}, + {0xa139029f6a239f72, 0x1c1fffc1ebc44e81}, + {0xc987434744ac874e, 0xa327ffb266b56221}, + {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9}, + {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa}, + {0xc4ce17b399107c22, 0xcb550fb4384d21d4}, + {0xf6019da07f549b2b, 0x7e2a53a146606a49}, + {0x99c102844f94e0fb, 0x2eda7444cbfc426e}, + {0xc0314325637a1939, 0xfa911155fefb5309}, + {0xf03d93eebc589f88, 0x793555ab7eba27cb}, + {0x96267c7535b763b5, 0x4bc1558b2f3458df}, + {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17}, + {0xea9c227723ee8bcb, 0x465e15a979c1cadd}, + {0x92a1958a7675175f, 0x0bfacd89ec191eca}, + {0xb749faed14125d36, 0xcef980ec671f667c}, + {0xe51c79a85916f484, 0x82b7e12780e7401b}, + {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811}, + {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16}, + {0xdfbdcece67006ac9, 0x67a791e093e1d49b}, + {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1}, + {0xaecc49914078536d, 0x58fae9f773886e19}, + {0xda7f5bf590966848, 0xaf39a475506a899f}, + {0x888f99797a5e012d, 0x6d8406c952429604}, + {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84}, + {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65}, + {0x855c3be0a17fcd26, 0x5cf2eea09a550680}, + {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, + {0xd0601d8efc57b08b, 0xf13b94daf124da27}, + {0x823c12795db6ce57, 0x76c53d08d6b70859}, + {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f}, + {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a}, + {0xfe5d54150b090b02, 0xd3f93b35435d7c4d}, + {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0}, + {0xc6b8e9b0709f109a, 0x359ab6419ca1091c}, + {0xf867241c8cc6d4c0, 0xc30163d203c94b63}, + {0x9b407691d7fc44f8, 0x79e0de63425dcf1e}, + {0xc21094364dfb5636, 0x985915fc12f542e5}, + {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e}, + {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43}, + {0xbd8430bd08277231, 0x50c6ff782a838354}, + {0xece53cec4a314ebd, 0xa4f8bf5635246429}, + {0x940f4613ae5ed136, 0x871b7795e136be9a}, + {0xb913179899f68584, 0x28e2557b59846e40}, + {0xe757dd7ec07426e5, 0x331aeada2fe589d0}, + {0x9096ea6f3848984f, 0x3ff0d2c85def7622}, + {0xb4bca50b065abe63, 0x0fed077a756b53aa}, + {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895}, + {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d}, + {0xb080392cc4349dec, 0xbd8d794d96aacfb4}, + {0xdca04777f541c567, 0xecf0d7a0fc5583a1}, + {0x89e42caaf9491b60, 0xf41686c49db57245}, + {0xac5d37d5b79b6239, 0x311c2875c522ced6}, + {0xd77485cb25823ac7, 0x7d633293366b828c}, + {0x86a8d39ef77164bc, 0xae5dff9c02033198}, + {0xa8530886b54dbdeb, 0xd9f57f830283fdfd}, + {0xd267caa862a12d66, 0xd072df63c324fd7c}, + {0x8380dea93da4bc60, 0x4247cb9e59f71e6e}, + {0xa46116538d0deb78, 0x52d9be85f074e609}, + {0xcd795be870516656, 0x67902e276c921f8c}, + {0x806bd9714632dff6, 0x00ba1cd8a3db53b7}, + {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5}, + {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce}, + {0xfad2a4b13d1b5d6c, 0x796b805720085f82}, + {0x9cc3a6eec6311a63, 0xcbe3303674053bb1}, + {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d}, + {0xf4f1b4d515acb93b, 0xee92fb5515482d45}, + {0x991711052d8bf3c5, 0x751bdd152d4d1c4b}, + {0xbf5cd54678eef0b6, 0xd262d45a78a0635e}, + {0xef340a98172aace4, 0x86fb897116c87c35}, + {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1}, + {0xbae0a846d2195712, 0x8974836059cca10a}, + {0xe998d258869facd7, 0x2bd1a438703fc94c}, + {0x91ff83775423cc06, 0x7b6306a34627ddd0}, + {0xb67f6455292cbf08, 0x1a3bc84c17b1d543}, + {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94}, + {0x8e938662882af53e, 0x547eb47b7282ee9d}, + {0xb23867fb2a35b28d, 0xe99e619a4f23aa44}, + {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5}, + {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05}, + {0xae0b158b4738705e, 0x9624ab50b148d446}, + {0xd98ddaee19068c76, 0x3badd624dd9b0958}, + {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7}, + {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d}, + {0xd47487cc8470652b, 0x7647c32000696720}, + {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074}, + {0xa5fb0a17c777cf09, 0xf468107100525891}, + {0xcf79cc9db955c2cc, 0x7182148d4066eeb5}, + {0x81ac1fe293d599bf, 0xc6f14cd848405531}, + {0xa21727db38cb002f, 0xb8ada00e5a506a7d}, + {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d}, + {0xfd442e4688bd304a, 0x908f4a166d1da664}, + {0x9e4a9cec15763e2e, 0x9a598e4e043287ff}, + {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe}, + {0xf7549530e188c128, 0xd12bee59e68ef47d}, + {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf}, + {0xc13a148e3032d6e7, 0xe36a52363c1faf02}, + {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2}, + {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba}, + {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8}, + {0xebdf661791d60f56, 0x111b495b3464ad22}, + {0x936b9fcebb25c995, 0xcab10dd900beec35}, + {0xb84687c269ef3bfb, 0x3d5d514f40eea743}, + {0xe65829b3046b0afa, 0x0cb4a5a3112a5113}, + {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac}, + {0xb3f4e093db73a093, 0x59ed216765690f57}, + {0xe0f218b8d25088b8, 0x306869c13ec3532d}, + {0x8c974f7383725573, 0x1e414218c73a13fc}, + {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, + {0xdbac6c247d62a583, 0xdf45f746b74abf3a}, + {0x894bc396ce5da772, 0x6b8bba8c328eb784}, + {0xab9eb47c81f5114f, 0x066ea92f3f326565}, + {0xd686619ba27255a2, 0xc80a537b0efefebe}, + {0x8613fd0145877585, 0xbd06742ce95f5f37}, + {0xa798fc4196e952e7, 0x2c48113823b73705}, + {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6}, + {0x82ef85133de648c4, 0x9a984d73dbe722fc}, + {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb}, + {0xcc963fee10b7d1b3, 0x318df905079926a9}, + {0xffbbcfe994e5c61f, 0xfdf17746497f7053}, + {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634}, + {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1}, + {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1}, + {0x9c1661a651213e2d, 0x06bea10ca65c084f}, + {0xc31bfa0fe5698db8, 0x486e494fcff30a63}, + {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb}, + {0x986ddb5c6b3a76b7, 0xf89629465a75e01d}, + {0xbe89523386091465, 0xf6bbb397f1135824}, + {0xee2ba6c0678b597f, 0x746aa07ded582e2d}, + {0x94db483840b717ef, 0xa8c2a44eb4571cdd}, + {0xba121a4650e4ddeb, 0x92f34d62616ce414}, + {0xe896a0d7e51e1566, 0x77b020baf9c81d18}, + {0x915e2486ef32cd60, 0x0ace1474dc1d122f}, + {0xb5b5ada8aaff80b8, 0x0d819992132456bb}, + {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a}, + {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, + {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3}, + {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf}, + {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c}, + {0xad4ab7112eb3929d, 0x86c16c98d2c953c7}, + {0xd89d64d57a607744, 0xe871c7bf077ba8b8}, + {0x87625f056c7c4a8b, 0x11471cd764ad4973}, + {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0}, + {0xd389b47879823479, 0x4aff1d108d4ec2c4}, + {0x843610cb4bf160cb, 0xcedf722a585139bb}, + {0xa54394fe1eedb8fe, 0xc2974eb4ee658829}, + {0xce947a3da6a9273e, 0x733d226229feea33}, + {0x811ccc668829b887, 0x0806357d5a3f5260}, + {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8}, + {0xc9bcff6034c13052, 0xfc89b393dd02f0b6}, + {0xfc2c3f3841f17c67, 0xbbac2078d443ace3}, + {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e}, + {0xc5029163f384a931, 0x0a9e795e65d4df12}, + {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6}, + {0x99ea0196163fa42e, 0x504bced1bf8e4e46}, + {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7}, + {0xf07da27a82c37088, 0x5d767327bb4e5a4d}, + {0x964e858c91ba2655, 0x3a6a07f8d510f870}, + {0xbbe226efb628afea, 0x890489f70a55368c}, + {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f}, + {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e}, + {0xb77ada0617e3bbcb, 0x09ce6ebb40173745}, + {0xe55990879ddcaabd, 0xcc420a6a101d0516}, + {0x8f57fa54c2a9eab6, 0x9fa946824a12232e}, + {0xb32df8e9f3546564, 0x47939822dc96abfa}, + {0xdff9772470297ebd, 0x59787e2b93bc56f8}, + {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b}, + {0xaefae51477a06b03, 0xede622920b6b23f2}, + {0xdab99e59958885c4, 0xe95fab368e45ecee}, + {0x88b402f7fd75539b, 0x11dbcb0218ebb415}, + {0xaae103b5fcd2a881, 0xd652bdc29f26a11a}, + {0xd59944a37c0752a2, 0x4be76d3346f04960}, + {0x857fcae62d8493a5, 0x6f70a4400c562ddc}, + {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953}, + {0xd097ad07a71f26b2, 0x7e2000a41346a7a8}, + {0x825ecc24c873782f, 0x8ed400668c0c28c9}, + {0xa2f67f2dfa90563b, 0x728900802f0f32fb}, + {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba}, + {0xfea126b7d78186bc, 0xe2f610c84987bfa9}, + {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca}, + {0xc6ede63fa05d3143, 0x91503d1c79720dbc}, + {0xf8a95fcf88747d94, 0x75a44c6397ce912b}, + {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb}, + {0xc24452da229b021b, 0xfbe85badce996169}, + {0xf2d56790ab41c2a2, 0xfae27299423fb9c4}, + {0x97c560ba6b0919a5, 0xdccd879fc967d41b}, + {0xbdb6b8e905cb600f, 0x5400e987bbc1c921}, + {0xed246723473e3813, 0x290123e9aab23b69}, + {0x9436c0760c86e30b, 0xf9a0b6720aaf6522}, + {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, + {0xe7958cb87392c2c2, 0xb60b1d1230b20e05}, + {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3}, + {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4}, + {0xe2280b6c20dd5232, 0x25c6da63c38de1b1}, + {0x8d590723948a535f, 0x579c487e5a38ad0f}, + {0xb0af48ec79ace837, 0x2d835a9df0c6d852}, + {0xdcdb1b2798182244, 0xf8e431456cf88e66}, + {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900}, + {0xac8b2d36eed2dac5, 0xe272467e3d222f40}, + {0xd7adf884aa879177, 0x5b0ed81dcc6abb10}, + {0x86ccbb52ea94baea, 0x98e947129fc2b4ea}, + {0xa87fea27a539e9a5, 0x3f2398d747b36225}, + {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae}, + {0x83a3eeeef9153e89, 0x1953cf68300424ad}, + {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8}, + {0xcdb02555653131b6, 0x3792f412cb06794e}, + {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1}, + {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5}, + {0xc8de047564d20a8b, 0xf245825a5a445276}, + {0xfb158592be068d2e, 0xeed6e2f0f0d56713}, + {0x9ced737bb6c4183d, 0x55464dd69685606c}, + {0xc428d05aa4751e4c, 0xaa97e14c3c26b887}, + {0xf53304714d9265df, 0xd53dd99f4b3066a9}, + {0x993fe2c6d07b7fab, 0xe546a8038efe402a}, + {0xbf8fdb78849a5f96, 0xde98520472bdd034}, + {0xef73d256a5c0f77c, 0x963e66858f6d4441}, + {0x95a8637627989aad, 0xdde7001379a44aa9}, + {0xbb127c53b17ec159, 0x5560c018580d5d53}, + {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7}, + {0x9226712162ab070d, 0xcab3961304ca70e9}, + {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23}, + {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b}, + {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243}, + {0xb267ed1940f1c61c, 0x55f038b237591ed4}, + {0xdf01e85f912e37a3, 0x6b6c46dec52f6689}, + {0x8b61313bbabce2c6, 0x2323ac4b3b3da016}, + {0xae397d8aa96c1b77, 0xabec975e0a0d081b}, + {0xd9c7dced53c72255, 0x96e7bd358c904a22}, + {0x881cea14545c7575, 0x7e50d64177da2e55}, + {0xaa242499697392d2, 0xdde50bd1d5d0b9ea}, + {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865}, + {0x84ec3c97da624ab4, 0xbd5af13bef0b113f}, + {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f}, + {0xcfb11ead453994ba, 0x67de18eda5814af3}, + {0x81ceb32c4b43fcf4, 0x80eacf948770ced8}, + {0xa2425ff75e14fc31, 0xa1258379a94d028e}, + {0xcad2f7f5359a3b3e, 0x096ee45813a04331}, + {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd}, + {0x9e74d1b791e07e48, 0x775ea264cf55347e}, + {0xc612062576589dda, 0x95364afe032a819e}, + {0xf79687aed3eec551, 0x3a83ddbd83f52205}, + {0x9abe14cd44753b52, 0xc4926a9672793543}, + {0xc16d9a0095928a27, 0x75b7053c0f178294}, + {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, + {0x971da05074da7bee, 0xd3f6fc16ebca5e04}, + {0xbce5086492111aea, 0x88f4bb1ca6bcf585}, + {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6}, + {0x9392ee8e921d5d07, 0x3aff322e62439fd0}, + {0xb877aa3236a4b449, 0x09befeb9fad487c3}, + {0xe69594bec44de15b, 0x4c2ebe687989a9b4}, + {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11}, + {0xb424dc35095cd80f, 0x538484c19ef38c95}, + {0xe12e13424bb40e13, 0x2865a5f206b06fba}, + {0x8cbccc096f5088cb, 0xf93f87b7442e45d4}, + {0xafebff0bcb24aafe, 0xf78f69a51539d749}, + {0xdbe6fecebdedd5be, 0xb573440e5a884d1c}, + {0x89705f4136b4a597, 0x31680a88f8953031}, + {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e}, + {0xd6bf94d5e57a42bc, 0x3d32907604691b4d}, + {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110}, + {0xa7c5ac471b478423, 0x0fcf80dc33721d54}, + {0xd1b71758e219652b, 0xd3c36113404ea4a9}, + {0x83126e978d4fdf3b, 0x645a1cac083126ea}, + {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4}, + {0xcccccccccccccccc, 0xcccccccccccccccd}, + {0x8000000000000000, 0x0000000000000000}, + {0xa000000000000000, 0x0000000000000000}, + {0xc800000000000000, 0x0000000000000000}, + {0xfa00000000000000, 0x0000000000000000}, + {0x9c40000000000000, 0x0000000000000000}, + {0xc350000000000000, 0x0000000000000000}, + {0xf424000000000000, 0x0000000000000000}, + {0x9896800000000000, 0x0000000000000000}, + {0xbebc200000000000, 0x0000000000000000}, + {0xee6b280000000000, 0x0000000000000000}, + {0x9502f90000000000, 0x0000000000000000}, + {0xba43b74000000000, 0x0000000000000000}, + {0xe8d4a51000000000, 0x0000000000000000}, + {0x9184e72a00000000, 0x0000000000000000}, + {0xb5e620f480000000, 0x0000000000000000}, + {0xe35fa931a0000000, 0x0000000000000000}, + {0x8e1bc9bf04000000, 0x0000000000000000}, + {0xb1a2bc2ec5000000, 0x0000000000000000}, + {0xde0b6b3a76400000, 0x0000000000000000}, + {0x8ac7230489e80000, 0x0000000000000000}, + {0xad78ebc5ac620000, 0x0000000000000000}, + {0xd8d726b7177a8000, 0x0000000000000000}, + {0x878678326eac9000, 0x0000000000000000}, + {0xa968163f0a57b400, 0x0000000000000000}, + {0xd3c21bcecceda100, 0x0000000000000000}, + {0x84595161401484a0, 0x0000000000000000}, + {0xa56fa5b99019a5c8, 0x0000000000000000}, + {0xcecb8f27f4200f3a, 0x0000000000000000}, + {0x813f3978f8940984, 0x4000000000000000}, + {0xa18f07d736b90be5, 0x5000000000000000}, + {0xc9f2c9cd04674ede, 0xa400000000000000}, + {0xfc6f7c4045812296, 0x4d00000000000000}, + {0x9dc5ada82b70b59d, 0xf020000000000000}, + {0xc5371912364ce305, 0x6c28000000000000}, + {0xf684df56c3e01bc6, 0xc732000000000000}, + {0x9a130b963a6c115c, 0x3c7f400000000000}, + {0xc097ce7bc90715b3, 0x4b9f100000000000}, + {0xf0bdc21abb48db20, 0x1e86d40000000000}, + {0x96769950b50d88f4, 0x1314448000000000}, + {0xbc143fa4e250eb31, 0x17d955a000000000}, + {0xeb194f8e1ae525fd, 0x5dcfab0800000000}, + {0x92efd1b8d0cf37be, 0x5aa1cae500000000}, + {0xb7abc627050305ad, 0xf14a3d9e40000000}, + {0xe596b7b0c643c719, 0x6d9ccd05d0000000}, + {0x8f7e32ce7bea5c6f, 0xe4820023a2000000}, + {0xb35dbf821ae4f38b, 0xdda2802c8a800000}, + {0xe0352f62a19e306e, 0xd50b2037ad200000}, + {0x8c213d9da502de45, 0x4526f422cc340000}, + {0xaf298d050e4395d6, 0x9670b12b7f410000}, + {0xdaf3f04651d47b4c, 0x3c0cdd765f114000}, + {0x88d8762bf324cd0f, 0xa5880a69fb6ac800}, + {0xab0e93b6efee0053, 0x8eea0d047a457a00}, + {0xd5d238a4abe98068, 0x72a4904598d6d880}, + {0x85a36366eb71f041, 0x47a6da2b7f864750}, + {0xa70c3c40a64e6c51, 0x999090b65f67d924}, + {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d}, + {0x82818f1281ed449f, 0xbff8f10e7a8921a4}, + {0xa321f2d7226895c7, 0xaff72d52192b6a0d}, + {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490}, + {0xfee50b7025c36a08, 0x02f236d04753d5b4}, + {0x9f4f2726179a2245, 0x01d762422c946590}, + {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5}, + {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2}, + {0x9b934c3b330c8577, 0x63cc55f49f88eb2f}, + {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb}, + {0xf316271c7fc3908a, 0x8bef464e3945ef7a}, + {0x97edd871cfda3a56, 0x97758bf0e3cbb5ac}, + {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317}, + {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd}, + {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a}, + {0xb975d6b6ee39e436, 0xb3e2fd538e122b44}, + {0xe7d34c64a9c85d44, 0x60dbbca87196b616}, + {0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd}, + {0xb51d13aea4a488dd, 0x6babab6398bdbe41}, + {0xe264589a4dcdab14, 0xc696963c7eed2dd1}, + {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2}, + {0xb0de65388cc8ada8, 0x3b25a55f43294bcb}, + {0xdd15fe86affad912, 0x49ef0eb713f39ebe}, + {0x8a2dbf142dfcc7ab, 0x6e3569326c784337}, + {0xacb92ed9397bf996, 0x49c2c37f07965404}, + {0xd7e77a8f87daf7fb, 0xdc33745ec97be906}, + {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3}, + {0xa8acd7c0222311bc, 0xc40832ea0d68ce0c}, + {0xd2d80db02aabd62b, 0xf50a3fa490c30190}, + {0x83c7088e1aab65db, 0x792667c6da79e0fa}, + {0xa4b8cab1a1563f52, 0x577001b891185938}, + {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86}, + {0x80b05e5ac60b6178, 0x544f8158315b05b4}, + {0xa0dc75f1778e39d6, 0x696361ae3db1c721}, + {0xc913936dd571c84c, 0x03bc3a19cd1e38e9}, + {0xfb5878494ace3a5f, 0x04ab48a04065c723}, + {0x9d174b2dcec0e47b, 0x62eb0d64283f9c76}, + {0xc45d1df942711d9a, 0x3ba5d0bd324f8394}, + {0xf5746577930d6500, 0xca8f44ec7ee36479}, + {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb}, + {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e}, + {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e}, + {0x95d04aee3b80ece5, 0xbba1f1d158724a12}, + {0xbb445da9ca61281f, 0x2a8a6e45ae8edc97}, + {0xea1575143cf97226, 0xf52d09d71a3293bd}, + {0x924d692ca61be758, 0x593c2626705f9c56}, + {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c}, + {0xe498f455c38b997a, 0x0b6dfb9c0f956447}, + {0x8edf98b59a373fec, 0x4724bd4189bd5eac}, + {0xb2977ee300c50fe7, 0x58edec91ec2cb657}, + {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed}, + {0x8b865b215899f46c, 0xbd79e0d20082ee74}, + {0xae67f1e9aec07187, 0xecd8590680a3aa11}, + {0xda01ee641a708de9, 0xe80e6f4820cc9495}, + {0x884134fe908658b2, 0x3109058d147fdcdd}, + {0xaa51823e34a7eede, 0xbd4b46f0599fd415}, + {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a}, + {0x850fadc09923329e, 0x03e2cf6bc604ddb0}, + {0xa6539930bf6bff45, 0x84db8346b786151c}, + {0xcfe87f7cef46ff16, 0xe612641865679a63}, + {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e}, + {0xa26da3999aef7749, 0xe3be5e330f38f09d}, + {0xcb090c8001ab551c, 0x5cadf5bfd3072cc5}, + {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6}, + {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa}, + {0xc646d63501a1511d, 0xb281e1fd541501b8}, + {0xf7d88bc24209a565, 0x1f225a7ca91a4226}, + {0x9ae757596946075f, 0x3375788de9b06958}, + {0xc1a12d2fc3978937, 0x0052d6b1641c83ae}, + {0xf209787bb47d6b84, 0xc0678c5dbd23a49a}, + {0x9745eb4d50ce6332, 0xf840b7ba963646e0}, + {0xbd176620a501fbff, 0xb650e5a93bc3d898}, + {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe}, + {0x93ba47c980e98cdf, 0xc66f336c36b10137}, + {0xb8a8d9bbe123f017, 0xb80b0047445d4184}, + {0xe6d3102ad96cec1d, 0xa60dc059157491e5}, + {0x9043ea1ac7e41392, 0x87c89837ad68db2f}, + {0xb454e4a179dd1877, 0x29babe4598c311fb}, + {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a}, + {0x8ce2529e2734bb1d, 0x1899e4a65f58660c}, + {0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f}, + {0xdc21a1171d42645d, 0x76707543f4fa1f73}, + {0x899504ae72497eba, 0x6a06494a791c53a8}, + {0xabfa45da0edbde69, 0x0487db9d17636892}, + {0xd6f8d7509292d603, 0x45a9d2845d3c42b6}, + {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2}, + {0xa7f26836f282b732, 0x8e6cac7768d7141e}, + {0xd1ef0244af2364ff, 0x3207d795430cd926}, + {0x8335616aed761f1f, 0x7f44e6bd49e807b8}, + {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6}, + {0xcd036837130890a1, 0x36dba887c37a8c0f}, + {0x802221226be55a64, 0xc2494954da2c9789}, + {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c}, + {0xc83553c5c8965d3d, 0x6f92829494e5acc7}, + {0xfa42a8b73abbf48c, 0xcb772339ba1f17f9}, + {0x9c69a97284b578d7, 0xff2a760414536efb}, + {0xc38413cf25e2d70d, 0xfef5138519684aba}, + {0xf46518c2ef5b8cd1, 0x7eb258665fc25d69}, + {0x98bf2f79d5993802, 0xef2f773ffbd97a61}, + {0xbeeefb584aff8603, 0xaafb550ffacfd8fa}, + {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38}, + {0x952ab45cfa97a0b2, 0xdd945a747bf26183}, + {0xba756174393d88df, 0x94f971119aeef9e4}, + {0xe912b9d1478ceb17, 0x7a37cd5601aab85d}, + {0x91abb422ccb812ee, 0xac62e055c10ab33a}, + {0xb616a12b7fe617aa, 0x577b986b314d6009}, + {0xe39c49765fdf9d94, 0xed5a7e85fda0b80b}, + {0x8e41ade9fbebc27d, 0x14588f13be847307}, + {0xb1d219647ae6b31c, 0x596eb2d8ae258fc8}, + {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb}, + {0x8aec23d680043bee, 0x25de7bb9480d5854}, + {0xada72ccc20054ae9, 0xaf561aa79a10ae6a}, + {0xd910f7ff28069da4, 0x1b2ba1518094da04}, + {0x87aa9aff79042286, 0x90fb44d2f05d0842}, + {0xa99541bf57452b28, 0x353a1607ac744a53}, + {0xd3fa922f2d1675f2, 0x42889b8997915ce8}, + {0x847c9b5d7c2e09b7, 0x69956135febada11}, + {0xa59bc234db398c25, 0x43fab9837e699095}, + {0xcf02b2c21207ef2e, 0x94f967e45e03f4bb}, + {0x8161afb94b44f57d, 0x1d1be0eebac278f5}, + {0xa1ba1ba79e1632dc, 0x6462d92a69731732}, + {0xca28a291859bbf93, 0x7d7b8f7503cfdcfe}, + {0xfcb2cb35e702af78, 0x5cda735244c3d43e}, + {0x9defbf01b061adab, 0x3a0888136afa64a7}, + {0xc56baec21c7a1916, 0x088aaa1845b8fdd0}, + {0xf6c69a72a3989f5b, 0x8aad549e57273d45}, + {0x9a3c2087a63f6399, 0x36ac54e2f678864b}, + {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd}, + {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5}, + {0x969eb7c47859e743, 0x9f644ae5a4b1b325}, + {0xbc4665b596706114, 0x873d5d9f0dde1fee}, + {0xeb57ff22fc0c7959, 0xa90cb506d155a7ea}, + {0x9316ff75dd87cbd8, 0x09a7f12442d588f2}, + {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb2f}, + {0xe5d3ef282a242e81, 0x8f1668c8a86da5fa}, + {0x8fa475791a569d10, 0xf96e017d694487bc}, + {0xb38d92d760ec4455, 0x37c981dcc395a9ac}, + {0xe070f78d3927556a, 0x85bbe253f47b1417}, + {0x8c469ab843b89562, 0x93956d7478ccec8e}, + {0xaf58416654a6babb, 0x387ac8d1970027b2}, + {0xdb2e51bfe9d0696a, 0x06997b05fcc0319e}, + {0x88fcf317f22241e2, 0x441fece3bdf81f03}, + {0xab3c2fddeeaad25a, 0xd527e81cad7626c3}, + {0xd60b3bd56a5586f1, 0x8a71e223d8d3b074}, + {0x85c7056562757456, 0xf6872d5667844e49}, + {0xa738c6bebb12d16c, 0xb428f8ac016561db}, + {0xd106f86e69d785c7, 0xe13336d701beba52}, + {0x82a45b450226b39c, 0xecc0024661173473}, + {0xa34d721642b06084, 0x27f002d7f95d0190}, + {0xcc20ce9bd35c78a5, 0x31ec038df7b441f4}, + {0xff290242c83396ce, 0x7e67047175a15271}, + {0x9f79a169bd203e41, 0x0f0062c6e984d386}, + {0xc75809c42c684dd1, 0x52c07b78a3e60868}, + {0xf92e0c3537826145, 0xa7709a56ccdf8a82}, + {0x9bbcc7a142b17ccb, 0x88a66076400bb691}, + {0xc2abf989935ddbfe, 0x6acff893d00ea435}, + {0xf356f7ebf83552fe, 0x0583f6b8c4124d43}, + {0x98165af37b2153de, 0xc3727a337a8b704a}, + {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c}, + {0xeda2ee1c7064130c, 0x1162def06f79df73}, + {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8}, + {0xb9a74a0637ce2ee1, 0x6d953e2bd7173692}, + {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437}, + {0x910ab1d4db9914a0, 0x1d9c9892400a22a2}, + {0xb54d5e4a127f59c8, 0x2503beb6d00cab4b}, + {0xe2a0b5dc971f303a, 0x2e44ae64840fd61d}, + {0x8da471a9de737e24, 0x5ceaecfed289e5d2}, + {0xb10d8e1456105dad, 0x7425a83e872c5f47}, + {0xdd50f1996b947518, 0xd12f124e28f77719}, + {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f}, + {0xace73cbfdc0bfb7b, 0x636cc64d1001550b}, + {0xd8210befd30efa5a, 0x3c47f7e05401aa4e}, + {0x8714a775e3e95c78, 0x65acfaec34810a71}, + {0xa8d9d1535ce3b396, 0x7f1839a741a14d0d}, + {0xd31045a8341ca07c, 0x1ede48111209a050}, + {0x83ea2b892091e44d, 0x934aed0aab460432}, + {0xa4e4b66b68b65d60, 0xf81da84d5617853f}, + {0xce1de40642e3f4b9, 0x36251260ab9d668e}, + {0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019}, + {0xa1075a24e4421730, 0xb24cf65b8612f81f}, + {0xc94930ae1d529cfc, 0xdee033f26797b627}, + {0xfb9b7cd9a4a7443c, 0x169840ef017da3b1}, + {0x9d412e0806e88aa5, 0x8e1f289560ee864e}, + {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2}, + {0xf5b5d7ec8acb58a2, 0xae10af696774b1db}, + {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29}, + {0xbff610b0cc6edd3f, 0x17fd090a58d32af3}, + {0xeff394dcff8a948e, 0xddfc4b4cef07f5b0}, + {0x95f83d0a1fb69cd9, 0x4abdaf101564f98e}, + {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1}, + {0xea53df5fd18d5513, 0x84c86189216dc5ed}, + {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4}, + {0xb7118682dbb66a77, 0x3fbc8c33221dc2a1}, + {0xe4d5e82392a40515, 0x0fabaf3feaa5334a}, + {0x8f05b1163ba6832d, 0x29cb4d87f2a7400e}, + {0xb2c71d5bca9023f8, 0x743e20e9ef511012}, + {0xdf78e4b2bd342cf6, 0x914da9246b255416}, + {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e}, + {0xae9672aba3d0c320, 0xa184ac2473b529b1}, + {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e}, + {0x8865899617fb1871, 0x7e2fa67c7a658892}, + {0xaa7eebfb9df9de8d, 0xddbb901b98feeab7}, + {0xd51ea6fa85785631, 0x552a74227f3ea565}, + {0x8533285c936b35de, 0xd53a88958f87275f}, + {0xa67ff273b8460356, 0x8a892abaf368f137}, + {0xd01fef10a657842c, 0x2d2b7569b0432d85}, + {0x8213f56a67f6b29b, 0x9c3b29620e29fc73}, + {0xa298f2c501f45f42, 0x8349f3ba91b47b8f}, + {0xcb3f2f7642717713, 0x241c70a936219a73}, + {0xfe0efb53d30dd4d7, 0xed238cd383aa0110}, + {0x9ec95d1463e8a506, 0xf4363804324a40aa}, + {0xc67bb4597ce2ce48, 0xb143c6053edcd0d5}, + {0xf81aa16fdc1b81da, 0xdd94b7868e94050a}, + {0x9b10a4e5e9913128, 0xca7cf2b4191c8326}, + {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0}, + {0xf24a01a73cf2dccf, 0xbc633b39673c8cec}, + {0x976e41088617ca01, 0xd5be0503e085d813}, + {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18}, + {0xec9c459d51852ba2, 0xddf8e7d60ed1219e}, + {0x93e1ab8252f33b45, 0xcabb90e5c942b503}, + {0xb8da1662e7b00a17, 0x3d6a751f3b936243}, + {0xe7109bfba19c0c9d, 0x0cc512670a783ad4}, + {0x906a617d450187e2, 0x27fb2b80668b24c5}, + {0xb484f9dc9641e9da, 0xb1f9f660802dedf6}, + {0xe1a63853bbd26451, 0x5e7873f8a0396973}, + {0x8d07e33455637eb2, 0xdb0b487b6423e1e8}, + {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62}, + {0xdc5c5301c56b75f7, 0x7641a140cc7810fb}, + {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d}, + {0xac2820d9623bf429, 0x546345fa9fbdcd44}, + {0xd732290fbacaf133, 0xa97c177947ad4095}, + {0x867f59a9d4bed6c0, 0x49ed8eabcccc485d}, + {0xa81f301449ee8c70, 0x5c68f256bfff5a74}, + {0xd226fc195c6a2f8c, 0x73832eec6fff3111}, + {0x83585d8fd9c25db7, 0xc831fd53c5ff7eab}, + {0xa42e74f3d032f525, 0xba3e7ca8b77f5e55}, + {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb}, + {0x80444b5e7aa7cf85, 0x7980d163cf5b81b3}, + {0xa0555e361951c366, 0xd7e105bcc332621f}, + {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7}, + {0xfa856334878fc150, 0xb14f98f6f0feb951}, + {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3}, + {0xc3b8358109e84f07, 0x0a862f80ec4700c8}, + {0xf4a642e14c6262c8, 0xcd27bb612758c0fa}, + {0x98e7e9cccfbd7dbd, 0x8038d51cb897789c}, + {0xbf21e44003acdd2c, 0xe0470a63e6bd56c3}, + {0xeeea5d5004981478, 0x1858ccfce06cac74}, + {0x95527a5202df0ccb, 0x0f37801e0c43ebc8}, + {0xbaa718e68396cffd, 0xd30560258f54e6ba}, + {0xe950df20247c83fd, 0x47c6b82ef32a2069}, + {0x91d28b7416cdd27e, 0x4cdc331d57fa5441}, + {0xb6472e511c81471d, 0xe0133fe4adf8e952}, + {0xe3d8f9e563a198e5, 0x58180fddd97723a6}, + {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648}, + {0xb201833b35d63f73, 0x2cd2cc6551e513da}, + {0xde81e40a034bcf4f, 0xf8077f7ea65e58d1}, + {0x8b112e86420f6191, 0xfb04afaf27faf782}, + {0xadd57a27d29339f6, 0x79c5db9af1f9b563}, + {0xd94ad8b1c7380874, 0x18375281ae7822bc}, + {0x87cec76f1c830548, 0x8f2293910d0b15b5}, + {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb22}, + {0xd433179d9c8cb841, 0x5fa60692a46151eb}, + {0x849feec281d7f328, 0xdbc7c41ba6bcd333}, + {0xa5c7ea73224deff3, 0x12b9b522906c0800}, + {0xcf39e50feae16bef, 0xd768226b34870a00}, + {0x81842f29f2cce375, 0xe6a1158300d46640}, + {0xa1e53af46f801c53, 0x60495ae3c1097fd0}, + {0xca5e89b18b602368, 0x385bb19cb14bdfc4}, + {0xfcf62c1dee382c42, 0x46729e03dd9ed7b5}, + {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d1}, + {0xc5a05277621be293, 0xc7098b7305241885}, + { 0xf70867153aa2db38, + 0xb8cbee4fc66d1ea7 } #else + {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, + {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, + {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, + {0x86a8d39ef77164bc, 0xae5dff9c02033198}, + {0xd98ddaee19068c76, 0x3badd624dd9b0958}, + {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, + {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, + {0xe55990879ddcaabd, 0xcc420a6a101d0516}, + {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, + {0x95a8637627989aad, 0xdde7001379a44aa9}, + {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, + {0xc350000000000000, 0x0000000000000000}, + {0x9dc5ada82b70b59d, 0xf020000000000000}, + {0xfee50b7025c36a08, 0x02f236d04753d5b4}, + {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86}, + {0xa6539930bf6bff45, 0x84db8346b786151c}, + {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2}, + {0xd910f7ff28069da4, 0x1b2ba1518094da04}, + {0xaf58416654a6babb, 0x387ac8d1970027b2}, + {0x8da471a9de737e24, 0x5ceaecfed289e5d2}, + {0xe4d5e82392a40515, 0x0fabaf3feaa5334a}, + {0xb8da1662e7b00a17, 0x3d6a751f3b936243}, + { 0x95527a5202df0ccb, + 0x0f37801e0c43ebc8 } +#endif + }; + +#if FMT_USE_FULL_CACHE_DRAGONBOX + return pow10_significands[k - float_info::min_k]; +#else + static constexpr const uint64_t powers_of_5_64[] = { + 0x0000000000000001, 0x0000000000000005, 0x0000000000000019, + 0x000000000000007d, 0x0000000000000271, 0x0000000000000c35, + 0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1, + 0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd, + 0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9, + 0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5, + 0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631, + 0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed, + 0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9}; + + static constexpr const uint32_t pow10_recovery_errors[] = { + 0x50001400, 0x54044100, 0x54014555, 0x55954415, 0x54115555, 0x00000001, + 0x50000000, 0x00104000, 0x54010004, 0x05004001, 0x55555544, 0x41545555, + 0x54040551, 0x15445545, 0x51555514, 0x10000015, 0x00101100, 0x01100015, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04450514, 0x45414110, + 0x55555145, 0x50544050, 0x15040155, 0x11054140, 0x50111514, 0x11451454, + 0x00400541, 0x00000000, 0x55555450, 0x10056551, 0x10054011, 0x55551014, + 0x69514555, 0x05151109, 0x00155555}; + static const int compression_ratio = 27; // Compute base index. @@ -1897,8 +1805,7 @@ template <> struct cache_accessor { int offset = k - kb; // Get base cache. - uint128_wrapper base_cache = - data::dragonbox_pow10_significands_128[cache_index]; + uint128_wrapper base_cache = pow10_significands[cache_index]; if (offset == 0) return base_cache; // Compute the required amount of bit-shift. @@ -1906,7 +1813,7 @@ template <> struct cache_accessor { FMT_ASSERT(alpha > 0 && alpha < 64, "shifting error detected"); // Try to recover the real cache. - uint64_t pow5 = data::powers_of_5_64[offset]; + uint64_t pow5 = powers_of_5_64[offset]; uint128_wrapper recovered_cache = umul128(base_cache.high(), pow5); uint128_wrapper middle_low = umul128(base_cache.low() - (kb < 0 ? 1u : 0u), pow5); @@ -1924,7 +1831,7 @@ template <> struct cache_accessor { // Get error. int error_idx = (k - float_info::min_k) / 16; - uint32_t error = (data::dragonbox_pow10_recovery_errors[error_idx] >> + uint32_t error = (pow10_recovery_errors[error_idx] >> ((k - float_info::min_k) % 16) * 2) & 0x3; @@ -2010,7 +1917,7 @@ bool is_center_integer(typename float_info::carrier_uint two_f, int exponent, } // Remove trailing zeros from n and return the number of zeros removed (float) -FMT_ALWAYS_INLINE int remove_trailing_zeros(uint32_t& n) FMT_NOEXCEPT { +FMT_INLINE int remove_trailing_zeros(uint32_t& n) FMT_NOEXCEPT { #ifdef FMT_BUILTIN_CTZ int t = FMT_BUILTIN_CTZ(n); #else @@ -2038,7 +1945,7 @@ FMT_ALWAYS_INLINE int remove_trailing_zeros(uint32_t& n) FMT_NOEXCEPT { } // Removes trailing zeros and returns the number of zeros removed (double) -FMT_ALWAYS_INLINE int remove_trailing_zeros(uint64_t& n) FMT_NOEXCEPT { +FMT_INLINE int remove_trailing_zeros(uint64_t& n) FMT_NOEXCEPT { #ifdef FMT_BUILTIN_CTZLL int t = FMT_BUILTIN_CTZLL(n); #else @@ -2124,8 +2031,7 @@ FMT_ALWAYS_INLINE int remove_trailing_zeros(uint64_t& n) FMT_NOEXCEPT { // The main algorithm for shorter interval case template -FMT_ALWAYS_INLINE FMT_SAFEBUFFERS decimal_fp shorter_interval_case( - int exponent) FMT_NOEXCEPT { +FMT_INLINE decimal_fp shorter_interval_case(int exponent) FMT_NOEXCEPT { decimal_fp ret_value; // Compute k and beta const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent); @@ -2171,8 +2077,7 @@ FMT_ALWAYS_INLINE FMT_SAFEBUFFERS decimal_fp shorter_interval_case( return ret_value; } -template -FMT_SAFEBUFFERS decimal_fp to_decimal(T x) FMT_NOEXCEPT { +template decimal_fp to_decimal(T x) FMT_NOEXCEPT { // Step 1: integer promotion & Schubfach multiplier calculation. using carrier_uint = typename float_info::carrier_uint; @@ -2308,7 +2213,7 @@ small_divisor_case_label: // Formats value using a variation of the Fixed-Precision Positive // Floating-Point Printout ((FPP)^2) algorithm by Steele & White: -// https://fmt.dev/p372-steele.pdf. +// https://fmt.dev/papers/p372-steele.pdf. template void fallback_format(Double d, int num_digits, bool binary32, buffer& buf, int& exp10) { @@ -2571,11 +2476,11 @@ int snprintf_float(T value, int precision, float_specs specs, --exp_pos; } while (*exp_pos != 'e'); char sign = exp_pos[1]; - assert(sign == '+' || sign == '-'); + FMT_ASSERT(sign == '+' || sign == '-', ""); int exp = 0; auto p = exp_pos + 2; // Skip 'e' and sign. do { - assert(is_digit(*p)); + FMT_ASSERT(is_digit(*p), ""); exp = exp * 10 + (*p++ - '0'); } while (p != end); if (sign == '-') exp = -exp; @@ -2592,71 +2497,11 @@ int snprintf_float(T value, int precision, float_specs specs, return exp - fraction_size; } } - -// A public domain branchless UTF-8 decoder by Christopher Wellons: -// https://github.com/skeeto/branchless-utf8 -/* Decode the next character, c, from buf, reporting errors in e. - * - * Since this is a branchless decoder, four bytes will be read from the - * buffer regardless of the actual length of the next character. This - * means the buffer _must_ have at least three bytes of zero padding - * following the end of the data stream. - * - * Errors are reported in e, which will be non-zero if the parsed - * character was somehow invalid: invalid byte sequence, non-canonical - * encoding, or a surrogate half. - * - * The function returns a pointer to the next character. When an error - * occurs, this pointer will be a guess that depends on the particular - * error, but it will always advance at least one byte. - */ -inline const char* utf8_decode(const char* buf, uint32_t* c, int* e) { - static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; - static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; - static const int shiftc[] = {0, 18, 12, 6, 0}; - static const int shifte[] = {0, 6, 4, 2, 0}; - - int len = code_point_length(buf); - const char* next = buf + len; - - // Assume a four-byte character and load four bytes. Unused bits are - // shifted out. - auto s = reinterpret_cast(buf); - *c = uint32_t(s[0] & masks[len]) << 18; - *c |= uint32_t(s[1] & 0x3f) << 12; - *c |= uint32_t(s[2] & 0x3f) << 6; - *c |= uint32_t(s[3] & 0x3f) << 0; - *c >>= shiftc[len]; - - // Accumulate the various error conditions. - *e = (*c < mins[len]) << 6; // non-canonical encoding - *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half? - *e |= (*c > 0x10FFFF) << 8; // out of range? - *e |= (s[1] & 0xc0) >> 2; - *e |= (s[2] & 0xc0) >> 4; - *e |= (s[3]) >> 6; - *e ^= 0x2a; // top two bits of each tail byte correct? - *e >>= shifte[len]; - - return next; -} - -struct stringifier { - template FMT_INLINE std::string operator()(T value) const { - return to_string(value); - } - std::string operator()(basic_format_arg::handle h) const { - memory_buffer buf; - format_parse_context parse_ctx({}); - format_context format_ctx(buffer_appender(buf), {}, {}); - h.format(parse_ctx, format_ctx); - return to_string(buf); - } -}; } // namespace detail template <> struct formatter { - format_parse_context::iterator parse(format_parse_context& ctx) { + FMT_CONSTEXPR format_parse_context::iterator parse( + format_parse_context& ctx) { return ctx.begin(); } @@ -2667,23 +2512,21 @@ template <> struct formatter { for (auto i = n.bigits_.size(); i > 0; --i) { auto value = n.bigits_[i - 1u]; if (first) { - out = format_to(out, "{:x}", value); + out = format_to(out, FMT_STRING("{:x}"), value); first = false; continue; } - out = format_to(out, "{:08x}", value); + out = format_to(out, FMT_STRING("{:08x}"), value); } if (n.exp_ > 0) - out = format_to(out, "p{}", n.exp_ * detail::bigint::bigit_bits); + out = format_to(out, FMT_STRING("p{}"), + n.exp_ * detail::bigint::bigit_bits); return out; } }; FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) { - auto transcode = [this](const char* p) { - auto cp = uint32_t(); - auto error = 0; - p = utf8_decode(p, &cp, &error); + for_each_codepoint(s, [this](uint32_t cp, int error) { if (error != 0) FMT_THROW(std::runtime_error("invalid utf8")); if (cp <= 0xFFFF) { buffer_.push_back(static_cast(cp)); @@ -2692,42 +2535,16 @@ FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) { buffer_.push_back(static_cast(0xD800 + (cp >> 10))); buffer_.push_back(static_cast(0xDC00 + (cp & 0x3FF))); } - return p; - }; - auto p = s.data(); - const size_t block_size = 4; // utf8_decode always reads blocks of 4 chars. - if (s.size() >= block_size) { - for (auto end = p + s.size() - block_size + 1; p < end;) p = transcode(p); - } - if (auto num_chars_left = s.data() + s.size() - p) { - char buf[2 * block_size - 1] = {}; - memcpy(buf, p, to_unsigned(num_chars_left)); - p = buf; - do { - p = transcode(p); - } while (p - buf < num_chars_left); - } + }); buffer_.push_back(0); } FMT_FUNC void format_system_error(detail::buffer& out, int error_code, - string_view message) FMT_NOEXCEPT { + const char* message) FMT_NOEXCEPT { FMT_TRY { - memory_buffer buf; - buf.resize(inline_buffer_size); - for (;;) { - char* system_message = &buf[0]; - int result = - detail::safe_strerror(error_code, system_message, buf.size()); - if (result == 0) { - format_to(detail::buffer_appender(out), "{}: {}", message, - system_message); - return; - } - if (result != ERANGE) - break; // Can't get error message, report error code instead. - buf.resize(buf.size() * 2); - } + auto ec = std::error_code(error_code, std::generic_category()); + write(std::back_inserter(out), std::system_error(ec, message).what()); + return; } FMT_CATCH(...) {} format_error_code(out, error_code, message); @@ -2738,18 +2555,15 @@ FMT_FUNC void detail::error_handler::on_error(const char* message) { } FMT_FUNC void report_system_error(int error_code, - fmt::string_view message) FMT_NOEXCEPT { + const char* message) FMT_NOEXCEPT { report_error(format_system_error, error_code, message); } -FMT_FUNC std::string detail::vformat(string_view format_str, format_args args) { - if (format_str.size() == 2 && equal2(format_str.data(), "{}")) { - auto arg = args.get(0); - if (!arg) error_handler().on_error("argument not found"); - return visit_format_arg(stringifier(), arg); - } - memory_buffer buffer; - detail::vformat_to(buffer, format_str, args); +FMT_FUNC std::string vformat(string_view fmt, format_args args) { + // Don't optimize the "{}" case to keep the binary size small and because it + // can be better optimized in fmt::format anyway. + auto buffer = memory_buffer(); + detail::vformat_to(buffer, fmt, args); return to_string(buffer); } @@ -2761,24 +2575,30 @@ extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( // } // namespace detail #endif -FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { - memory_buffer buffer; - detail::vformat_to(buffer, format_str, - basic_format_args>(args)); +namespace detail { +FMT_FUNC void print(std::FILE* f, string_view text) { #ifdef _WIN32 auto fd = _fileno(f); if (_isatty(fd)) { - detail::utf8_to_utf16 u16(string_view(buffer.data(), buffer.size())); + detail::utf8_to_utf16 u16(string_view(text.data(), text.size())); auto written = detail::dword(); - if (!detail::WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), - u16.c_str(), static_cast(u16.size()), - &written, nullptr)) { - FMT_THROW(format_error("failed to write to console")); + if (detail::WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), + u16.c_str(), static_cast(u16.size()), + &written, nullptr)) { + return; } - return; + // Fallback to fwrite on failure. It can happen if the output has been + // redirected to NUL. } #endif - detail::fwrite_fully(buffer.data(), 1, buffer.size(), f); + detail::fwrite_fully(text.data(), 1, text.size(), f); +} +} // namespace detail + +FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { + memory_buffer buffer; + detail::vformat_to(buffer, format_str, args); + detail::print(f, {buffer.data(), buffer.size()}); } #ifdef _WIN32 diff --git a/src/fmt/format.h b/src/fmt/format.h index 1a037b02b7..03ae1c961a 100644 --- a/src/fmt/format.h +++ b/src/fmt/format.h @@ -33,13 +33,13 @@ #ifndef FMT_FORMAT_H_ #define FMT_FORMAT_H_ -#include -#include -#include -#include -#include -#include -#include +#include // std::signbit +#include // uint32_t +#include // std::numeric_limits +#include // std::uninitialized_copy +#include // std::runtime_error +#include // std::system_error +#include // std::swap #include "core.h" @@ -69,30 +69,10 @@ # define FMT_NOINLINE #endif -#if __cplusplus == 201103L || __cplusplus == 201402L -# if defined(__INTEL_COMPILER) || defined(__PGI) -# define FMT_FALLTHROUGH -# elif defined(__clang__) -# define FMT_FALLTHROUGH [[clang::fallthrough]] -# elif FMT_GCC_VERSION >= 700 && \ - (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520) -# define FMT_FALLTHROUGH [[gnu::fallthrough]] -# else -# define FMT_FALLTHROUGH -# endif -#elif FMT_HAS_CPP17_ATTRIBUTE(fallthrough) || \ - (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) -# define FMT_FALLTHROUGH [[fallthrough]] +#if FMT_MSC_VER +# define FMT_MSC_DEFAULT = default #else -# define FMT_FALLTHROUGH -#endif - -#ifndef FMT_MAYBE_UNUSED -# if FMT_HAS_CPP17_ATTRIBUTE(maybe_unused) -# define FMT_MAYBE_UNUSED [[maybe_unused]] -# else -# define FMT_MAYBE_UNUSED -# endif +# define FMT_MSC_DEFAULT #endif #ifndef FMT_THROW @@ -113,10 +93,9 @@ FMT_END_NAMESPACE # define FMT_THROW(x) throw x # endif # else -# define FMT_THROW(x) \ - do { \ - static_cast(sizeof(x)); \ - FMT_ASSERT(false, ""); \ +# define FMT_THROW(x) \ + do { \ + FMT_ASSERT(false, (x).what()); \ } while (false) # endif #endif @@ -129,6 +108,27 @@ FMT_END_NAMESPACE # define FMT_CATCH(x) if (false) #endif +#ifndef FMT_DEPRECATED +# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VER >= 1900 +# define FMT_DEPRECATED [[deprecated]] +# else +# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) +# define FMT_DEPRECATED __attribute__((deprecated)) +# elif FMT_MSC_VER +# define FMT_DEPRECATED __declspec(deprecated) +# else +# define FMT_DEPRECATED /* deprecated */ +# endif +# endif +#endif + +// Workaround broken [[deprecated]] in the Intel, PGI and NVCC compilers. +#if FMT_ICC_VERSION || defined(__PGI) || FMT_NVCC +# define FMT_DEPRECATED_ALIAS +#else +# define FMT_DEPRECATED_ALIAS FMT_DEPRECATED +#endif + #ifndef FMT_USE_USER_DEFINED_LITERALS // EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs. # if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ @@ -140,36 +140,10 @@ FMT_END_NAMESPACE # endif #endif -#ifndef FMT_USE_UDL_TEMPLATE -// EDG frontend based compilers (icc, nvcc, PGI, etc) and GCC < 6.4 do not -// properly support UDL templates and GCC >= 9 warns about them. -# if FMT_USE_USER_DEFINED_LITERALS && \ - (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 501) && \ - ((FMT_GCC_VERSION >= 604 && __cplusplus >= 201402L) || \ - FMT_CLANG_VERSION >= 304) && \ - !defined(__PGI) && !defined(__NVCC__) -# define FMT_USE_UDL_TEMPLATE 1 -# else -# define FMT_USE_UDL_TEMPLATE 0 -# endif -#endif - -#ifndef FMT_USE_FLOAT -# define FMT_USE_FLOAT 1 -#endif - -#ifndef FMT_USE_DOUBLE -# define FMT_USE_DOUBLE 1 -#endif - -#ifndef FMT_USE_LONG_DOUBLE -# define FMT_USE_LONG_DOUBLE 1 -#endif - // Defining FMT_REDUCE_INT_INSTANTIATIONS to 1, will reduce the number of -// int_writer template instances to just one by only using the largest integer -// type. This results in a reduction in binary size but will cause a decrease in -// integer formatting performance. +// integer formatter template instantiations to just one by only using the +// largest integer type. This results in a reduction in binary size but will +// cause a decrease in integer formatting performance. #if !defined(FMT_REDUCE_INT_INSTANTIATIONS) # define FMT_REDUCE_INT_INSTANTIATIONS 0 #endif @@ -196,33 +170,33 @@ FMT_END_NAMESPACE // Some compilers masquerade as both MSVC and GCC-likes or otherwise support // __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the // MSVC intrinsics if the clz and clzll builtins are not available. -#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && \ - !defined(FMT_BUILTIN_CTZLL) && !defined(_MANAGED) +#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(FMT_BUILTIN_CTZLL) FMT_BEGIN_NAMESPACE namespace detail { // Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning. -# ifndef __clang__ +# if !defined(__clang__) +# pragma managed(push, off) # pragma intrinsic(_BitScanForward) # pragma intrinsic(_BitScanReverse) -# endif -# if defined(_WIN64) && !defined(__clang__) -# pragma intrinsic(_BitScanForward64) -# pragma intrinsic(_BitScanReverse64) +# if defined(_WIN64) +# pragma intrinsic(_BitScanForward64) +# pragma intrinsic(_BitScanReverse64) +# endif # endif -inline int clz(uint32_t x) { +inline auto clz(uint32_t x) -> int { unsigned long r = 0; _BitScanReverse(&r, x); FMT_ASSERT(x != 0, ""); // Static analysis complains about using uninitialized data // "r", but the only way that can happen is if "x" is 0, // which the callers guarantee to not happen. - FMT_SUPPRESS_MSC_WARNING(6102) + FMT_MSC_WARNING(suppress : 6102) return 31 ^ static_cast(r); } # define FMT_BUILTIN_CLZ(n) detail::clz(n) -inline int clzll(uint64_t x) { +inline auto clzll(uint64_t x) -> int { unsigned long r = 0; # ifdef _WIN64 _BitScanReverse64(&r, x); @@ -233,24 +207,24 @@ inline int clzll(uint64_t x) { _BitScanReverse(&r, static_cast(x)); # endif FMT_ASSERT(x != 0, ""); - FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning. + FMT_MSC_WARNING(suppress : 6102) // Suppress a bogus static analysis warning. return 63 ^ static_cast(r); } # define FMT_BUILTIN_CLZLL(n) detail::clzll(n) -inline int ctz(uint32_t x) { +inline auto ctz(uint32_t x) -> int { unsigned long r = 0; _BitScanForward(&r, x); FMT_ASSERT(x != 0, ""); - FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning. + FMT_MSC_WARNING(suppress : 6102) // Suppress a bogus static analysis warning. return static_cast(r); } # define FMT_BUILTIN_CTZ(n) detail::ctz(n) -inline int ctzll(uint64_t x) { +inline auto ctzll(uint64_t x) -> int { unsigned long r = 0; FMT_ASSERT(x != 0, ""); - FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning. + FMT_MSC_WARNING(suppress : 6102) // Suppress a bogus static analysis warning. # ifdef _WIN64 _BitScanForward64(&r, x); # else @@ -263,30 +237,35 @@ inline int ctzll(uint64_t x) { return static_cast(r); } # define FMT_BUILTIN_CTZLL(n) detail::ctzll(n) +# if !defined(__clang__) +# pragma managed(pop) +# endif } // namespace detail FMT_END_NAMESPACE #endif -// Enable the deprecated numeric alignment. -#ifndef FMT_DEPRECATED_NUMERIC_ALIGN -# define FMT_DEPRECATED_NUMERIC_ALIGN 0 -#endif - FMT_BEGIN_NAMESPACE namespace detail { +#if __cplusplus >= 202002L || \ + (__cplusplus >= 201709L && FMT_GCC_VERSION >= 1002) +# define FMT_CONSTEXPR20 constexpr +#else +# define FMT_CONSTEXPR20 +#endif + // An equivalent of `*reinterpret_cast(&source)` that doesn't have // undefined behavior (e.g. due to type aliasing). // Example: uint64_t d = bit_cast(2.718); template -inline Dest bit_cast(const Source& source) { +inline auto bit_cast(const Source& source) -> Dest { static_assert(sizeof(Dest) == sizeof(Source), "size mismatch"); Dest dest; std::memcpy(&dest, &source, sizeof(dest)); return dest; } -inline bool is_big_endian() { +inline auto is_big_endian() -> bool { const auto u = 1u; struct bytes { char data[sizeof(u)]; @@ -309,26 +288,28 @@ struct fallback_uintptr { }; #ifdef UINTPTR_MAX using uintptr_t = ::uintptr_t; -inline uintptr_t to_uintptr(const void* p) { return bit_cast(p); } +inline auto to_uintptr(const void* p) -> uintptr_t { + return bit_cast(p); +} #else using uintptr_t = fallback_uintptr; -inline fallback_uintptr to_uintptr(const void* p) { +inline auto to_uintptr(const void* p) -> fallback_uintptr { return fallback_uintptr(p); } #endif // Returns the largest possible value for type T. Same as // std::numeric_limits::max() but shorter and not affected by the max macro. -template constexpr T max_value() { +template constexpr auto max_value() -> T { return (std::numeric_limits::max)(); } -template constexpr int num_bits() { +template constexpr auto num_bits() -> int { return std::numeric_limits::digits; } // std::numeric_limits::digits may return 0 for 128-bit ints. -template <> constexpr int num_bits() { return 128; } -template <> constexpr int num_bits() { return 128; } -template <> constexpr int num_bits() { +template <> constexpr auto num_bits() -> int { return 128; } +template <> constexpr auto num_bits() -> int { return 128; } +template <> constexpr auto num_bits() -> int { return static_cast(sizeof(void*) * std::numeric_limits::digits); } @@ -346,31 +327,35 @@ using iterator_t = decltype(std::begin(std::declval())); template using sentinel_t = decltype(std::end(std::declval())); // A workaround for std::string not having mutable data() until C++17. -template inline Char* get_data(std::basic_string& s) { +template +inline auto get_data(std::basic_string& s) -> Char* { return &s[0]; } template -inline typename Container::value_type* get_data(Container& c) { +inline auto get_data(Container& c) -> typename Container::value_type* { return c.data(); } #if defined(_SECURE_SCL) && _SECURE_SCL // Make a checked iterator to avoid MSVC warnings. template using checked_ptr = stdext::checked_array_iterator; -template checked_ptr make_checked(T* p, size_t size) { +template auto make_checked(T* p, size_t size) -> checked_ptr { return {p, size}; } #else template using checked_ptr = T*; -template inline T* make_checked(T* p, size_t) { return p; } +template inline auto make_checked(T* p, size_t) -> T* { return p; } #endif +// Attempts to reserve space for n extra characters in the output range. +// Returns a pointer to the reserved range or a reference to it. template ::value)> -#if FMT_CLANG_VERSION +#if FMT_CLANG_VERSION >= 307 && !FMT_ICC_VERSION __attribute__((no_sanitize("undefined"))) #endif -inline checked_ptr -reserve(std::back_insert_iterator it, size_t n) { +inline auto +reserve(std::back_insert_iterator it, size_t n) + -> checked_ptr { Container& c = get_container(it); size_t size = c.size(); c.resize(size + n); @@ -378,21 +363,26 @@ reserve(std::back_insert_iterator it, size_t n) { } template -inline buffer_appender reserve(buffer_appender it, size_t n) { +inline auto reserve(buffer_appender it, size_t n) -> buffer_appender { buffer& buf = get_container(it); buf.try_reserve(buf.size() + n); return it; } -template inline Iterator& reserve(Iterator& it, size_t) { +template +constexpr auto reserve(Iterator& it, size_t) -> Iterator& { return it; } +template +using reserve_iterator = + remove_reference_t(), 0))>; + template -constexpr T* to_pointer(OutputIt, size_t) { +constexpr auto to_pointer(OutputIt, size_t) -> T* { return nullptr; } -template T* to_pointer(buffer_appender it, size_t n) { +template auto to_pointer(buffer_appender it, size_t n) -> T* { buffer& buf = get_container(it); auto size = buf.size(); if (buf.capacity() < size + n) return nullptr; @@ -401,192 +391,179 @@ template T* to_pointer(buffer_appender it, size_t n) { } template ::value)> -inline std::back_insert_iterator base_iterator( - std::back_insert_iterator& it, - checked_ptr) { +inline auto base_iterator(std::back_insert_iterator& it, + checked_ptr) + -> std::back_insert_iterator { return it; } template -inline Iterator base_iterator(Iterator, Iterator it) { +constexpr auto base_iterator(Iterator, Iterator it) -> Iterator { return it; } -// An output iterator that counts the number of objects written to it and -// discards them. -class counting_iterator { - private: - size_t count_; +// is spectacularly slow to compile in C++20 so use a simple fill_n +// instead (#1998). +template +FMT_CONSTEXPR auto fill_n(OutputIt out, Size count, const T& value) + -> OutputIt { + for (Size i = 0; i < count; ++i) *out++ = value; + return out; +} +template +FMT_CONSTEXPR20 auto fill_n(T* out, Size count, char value) -> T* { + if (is_constant_evaluated()) { + return fill_n(out, count, value); + } + std::memset(out, value, to_unsigned(count)); + return out + count; +} - public: - using iterator_category = std::output_iterator_tag; - using difference_type = std::ptrdiff_t; - using pointer = void; - using reference = void; - using _Unchecked_type = counting_iterator; // Mark iterator as checked. +#ifdef __cpp_char8_t +using char8_type = char8_t; +#else +enum char8_type : unsigned char {}; +#endif - struct value_type { - template void operator=(const T&) {} +template +FMT_CONSTEXPR FMT_NOINLINE auto copy_str_noinline(InputIt begin, InputIt end, + OutputIt out) -> OutputIt { + return copy_str(begin, end, out); +} + +// A public domain branchless UTF-8 decoder by Christopher Wellons: +// https://github.com/skeeto/branchless-utf8 +/* Decode the next character, c, from s, reporting errors in e. + * + * Since this is a branchless decoder, four bytes will be read from the + * buffer regardless of the actual length of the next character. This + * means the buffer _must_ have at least three bytes of zero padding + * following the end of the data stream. + * + * Errors are reported in e, which will be non-zero if the parsed + * character was somehow invalid: invalid byte sequence, non-canonical + * encoding, or a surrogate half. + * + * The function returns a pointer to the next character. When an error + * occurs, this pointer will be a guess that depends on the particular + * error, but it will always advance at least one byte. + */ +FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e) + -> const char* { + constexpr const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; + constexpr const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; + constexpr const int shiftc[] = {0, 18, 12, 6, 0}; + constexpr const int shifte[] = {0, 6, 4, 2, 0}; + + int len = code_point_length(s); + const char* next = s + len; + + // Assume a four-byte character and load four bytes. Unused bits are + // shifted out. + *c = uint32_t(s[0] & masks[len]) << 18; + *c |= uint32_t(s[1] & 0x3f) << 12; + *c |= uint32_t(s[2] & 0x3f) << 6; + *c |= uint32_t(s[3] & 0x3f) << 0; + *c >>= shiftc[len]; + + // Accumulate the various error conditions. + using uchar = unsigned char; + *e = (*c < mins[len]) << 6; // non-canonical encoding + *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half? + *e |= (*c > 0x10FFFF) << 8; // out of range? + *e |= (uchar(s[1]) & 0xc0) >> 2; + *e |= (uchar(s[2]) & 0xc0) >> 4; + *e |= uchar(s[3]) >> 6; + *e ^= 0x2a; // top two bits of each tail byte correct? + *e >>= shifte[len]; + + return next; +} + +template +FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) { + auto decode = [f](const char* p) { + auto cp = uint32_t(); + auto error = 0; + p = utf8_decode(p, &cp, &error); + f(cp, error); + return p; }; - - counting_iterator() : count_(0) {} - - size_t count() const { return count_; } - - counting_iterator& operator++() { - ++count_; - return *this; + auto p = s.data(); + const size_t block_size = 4; // utf8_decode always reads blocks of 4 chars. + if (s.size() >= block_size) { + for (auto end = p + s.size() - block_size + 1; p < end;) p = decode(p); } - counting_iterator operator++(int) { - auto it = *this; - ++*this; - return it; + if (auto num_chars_left = s.data() + s.size() - p) { + char buf[2 * block_size - 1] = {}; + copy_str(p, p + num_chars_left, buf); + p = buf; + do { + p = decode(p); + } while (p - buf < num_chars_left); } - - friend counting_iterator operator+(counting_iterator it, difference_type n) { - it.count_ += static_cast(n); - return it; - } - - value_type operator*() const { return {}; } -}; - -template class truncating_iterator_base { - protected: - OutputIt out_; - size_t limit_; - size_t count_; - - truncating_iterator_base(OutputIt out, size_t limit) - : out_(out), limit_(limit), count_(0) {} - - public: - using iterator_category = std::output_iterator_tag; - using value_type = typename std::iterator_traits::value_type; - using difference_type = void; - using pointer = void; - using reference = void; - using _Unchecked_type = - truncating_iterator_base; // Mark iterator as checked. - - OutputIt base() const { return out_; } - size_t count() const { return count_; } -}; - -// An output iterator that truncates the output and counts the number of objects -// written to it. -template ::value_type>::type> -class truncating_iterator; - -template -class truncating_iterator - : public truncating_iterator_base { - mutable typename truncating_iterator_base::value_type blackhole_; - - public: - using value_type = typename truncating_iterator_base::value_type; - - truncating_iterator(OutputIt out, size_t limit) - : truncating_iterator_base(out, limit) {} - - truncating_iterator& operator++() { - if (this->count_++ < this->limit_) ++this->out_; - return *this; - } - - truncating_iterator operator++(int) { - auto it = *this; - ++*this; - return it; - } - - value_type& operator*() const { - return this->count_ < this->limit_ ? *this->out_ : blackhole_; - } -}; - -template -class truncating_iterator - : public truncating_iterator_base { - public: - truncating_iterator(OutputIt out, size_t limit) - : truncating_iterator_base(out, limit) {} - - template truncating_iterator& operator=(T val) { - if (this->count_++ < this->limit_) *this->out_++ = val; - return *this; - } - - truncating_iterator& operator++() { return *this; } - truncating_iterator& operator++(int) { return *this; } - truncating_iterator& operator*() { return *this; } -}; +} template -inline size_t count_code_points(basic_string_view s) { +inline auto compute_width(basic_string_view s) -> size_t { return s.size(); } -// Counts the number of code points in a UTF-8 string. -inline size_t count_code_points(basic_string_view s) { - const char* data = s.data(); +// Computes approximate display width of a UTF-8 string. +FMT_CONSTEXPR inline size_t compute_width(string_view s) { size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80) ++num_code_points; - } + // It is not a lambda for compatibility with C++14. + struct count_code_points { + size_t* count; + FMT_CONSTEXPR void operator()(uint32_t cp, int error) const { + *count += detail::to_unsigned( + 1 + + (error == 0 && cp >= 0x1100 && + (cp <= 0x115f || // Hangul Jamo init. consonants + cp == 0x2329 || // LEFT-POINTING ANGLE BRACKET〈 + cp == 0x232a || // RIGHT-POINTING ANGLE BRACKET 〉 + // CJK ... Yi except Unicode Character “〿”: + (cp >= 0x2e80 && cp <= 0xa4cf && cp != 0x303f) || + (cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables + (cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility Ideographs + (cp >= 0xfe10 && cp <= 0xfe19) || // Vertical Forms + (cp >= 0xfe30 && cp <= 0xfe6f) || // CJK Compatibility Forms + (cp >= 0xff00 && cp <= 0xff60) || // Fullwidth Forms + (cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth Forms + (cp >= 0x20000 && cp <= 0x2fffd) || // CJK + (cp >= 0x30000 && cp <= 0x3fffd) || + // Miscellaneous Symbols and Pictographs + Emoticons: + (cp >= 0x1f300 && cp <= 0x1f64f) || + // Supplemental Symbols and Pictographs: + (cp >= 0x1f900 && cp <= 0x1f9ff)))); + } + }; + for_each_codepoint(s, count_code_points{&num_code_points}); return num_code_points; } -inline size_t count_code_points(basic_string_view s) { - return count_code_points(basic_string_view( +inline auto compute_width(basic_string_view s) -> size_t { + return compute_width(basic_string_view( reinterpret_cast(s.data()), s.size())); } template -inline size_t code_point_index(basic_string_view s, size_t n) { +inline auto code_point_index(basic_string_view s, size_t n) -> size_t { size_t size = s.size(); return n < size ? n : size; } // Calculates the index of the nth code point in a UTF-8 string. -inline size_t code_point_index(basic_string_view s, size_t n) { +inline auto code_point_index(basic_string_view s, size_t n) + -> size_t { const char8_type* data = s.data(); size_t num_code_points = 0; for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) { - return i; - } + if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i; } return s.size(); } -template -using needs_conversion = bool_constant< - std::is_same::value_type, - char>::value && - std::is_same::value>; - -template ::value)> -OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { - return std::copy(begin, end, it); -} - -template ::value)> -OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { - return std::transform(begin, end, it, - [](char c) { return static_cast(c); }); -} - -template -inline counting_iterator copy_str(InputIt begin, InputIt end, - counting_iterator it) { - return it + (end - begin); -} - template using is_fast_float = bool_constant::is_iec559 && sizeof(T) <= sizeof(double)>; @@ -598,7 +575,7 @@ using is_fast_float = bool_constant::is_iec559 && template template void buffer::append(const U* begin, const U* end) { - do { + while (begin != end) { auto count = to_unsigned(end - begin); try_reserve(size_ + count); auto free_cap = capacity_ - size_; @@ -606,16 +583,17 @@ void buffer::append(const U* begin, const U* end) { std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count)); size_ += count; begin += count; - } while (begin != end); + } } -template -void iterator_buffer::flush() { - out_ = std::copy_n(data_, this->limit(this->size()), out_); - this->clear(); -} +template +struct is_locale : std::false_type {}; +template +struct is_locale> : std::true_type {}; } // namespace detail +FMT_MODULE_EXPORT_BEGIN + // The number of characters to store in the basic_memory_buffer object itself // to avoid dynamic memory allocation. enum { inline_buffer_size = 500 }; @@ -625,15 +603,7 @@ enum { inline_buffer_size = 500 }; A dynamically growing memory buffer for trivially copyable/constructible types with the first ``SIZE`` elements stored in the object itself. - You can use one of the following type aliases for common character types: - - +----------------+------------------------------+ - | Type | Definition | - +================+==============================+ - | memory_buffer | basic_memory_buffer | - +----------------+------------------------------+ - | wmemory_buffer | basic_memory_buffer | - +----------------+------------------------------+ + You can use the ```memory_buffer`` type alias for ``char`` instead. **Example**:: @@ -710,7 +680,8 @@ class basic_memory_buffer final : public detail::buffer { Moves the content of the other ``basic_memory_buffer`` object to this one. \endrst */ - basic_memory_buffer& operator=(basic_memory_buffer&& other) FMT_NOEXCEPT { + auto operator=(basic_memory_buffer&& other) FMT_NOEXCEPT + -> basic_memory_buffer& { FMT_ASSERT(this != &other, ""); deallocate(); move(other); @@ -718,7 +689,7 @@ class basic_memory_buffer final : public detail::buffer { } // Returns a copy of the allocator associated with this buffer. - Allocator get_allocator() const { return alloc_; } + auto get_allocator() const -> Allocator { return alloc_; } /** Resizes the buffer to contain *count* elements. If T is a POD type new @@ -742,9 +713,13 @@ void basic_memory_buffer::grow(size_t size) { #ifdef FMT_FUZZ if (size > 5000) throw std::runtime_error("fuzz mode - won't grow that much"); #endif + const size_t max_size = std::allocator_traits::max_size(alloc_); size_t old_capacity = this->capacity(); size_t new_capacity = old_capacity + old_capacity / 2; - if (size > new_capacity) new_capacity = size; + if (size > new_capacity) + new_capacity = size; + else if (new_capacity > max_size) + new_capacity = size > max_size ? size : max_size; T* old_data = this->data(); T* new_data = std::allocator_traits::allocate(alloc_, new_capacity); @@ -759,12 +734,15 @@ void basic_memory_buffer::grow(size_t size) { } using memory_buffer = basic_memory_buffer; -using wmemory_buffer = basic_memory_buffer; template struct is_contiguous> : std::true_type { }; +namespace detail { +FMT_API void print(std::FILE*, string_view); +} + /** A formatting error such as invalid format string. */ FMT_CLASS_API class FMT_API format_error : public std::runtime_error { @@ -776,10 +754,66 @@ class FMT_API format_error : public std::runtime_error { format_error& operator=(const format_error&) = default; format_error(format_error&&) = default; format_error& operator=(format_error&&) = default; - ~format_error() FMT_NOEXCEPT FMT_OVERRIDE; + ~format_error() FMT_NOEXCEPT FMT_OVERRIDE FMT_MSC_DEFAULT; }; -namespace detail { +/** + \rst + Constructs a `~fmt::format_arg_store` object that contains references + to arguments and can be implicitly converted to `~fmt::format_args`. + If ``fmt`` is a compile-time string then `make_args_checked` checks + its validity at compile time. + \endrst + */ +template > +FMT_INLINE auto make_args_checked(const S& fmt, + const remove_reference_t&... args) + -> format_arg_store, remove_reference_t...> { + static_assert( + detail::count<( + std::is_base_of>::value && + std::is_reference::value)...>() == 0, + "passing views as lvalues is disallowed"); + detail::check_format_string(fmt); + return {args...}; +} + +// compile-time support +namespace detail_exported { +#if FMT_USE_NONTYPE_TEMPLATE_PARAMETERS +template struct fixed_string { + constexpr fixed_string(const Char (&str)[N]) { + detail::copy_str(static_cast(str), + str + N, data); + } + Char data[N]{}; +}; +#endif + +// Converts a compile-time string to basic_string_view. +template +constexpr auto compile_string_to_view(const Char (&s)[N]) + -> basic_string_view { + // Remove trailing NUL character if needed. Won't be present if this is used + // with a raw character array (i.e. not defined as a string). + return {s, N - (std::char_traits::to_int_type(s[N - 1]) == 0 ? 1 : 0)}; +} +template +constexpr auto compile_string_to_view(detail::std_string_view s) + -> basic_string_view { + return {s.data(), s.size()}; +} +} // namespace detail_exported + +FMT_BEGIN_DETAIL_NAMESPACE + +inline void throw_format_error(const char* message) { + FMT_THROW(format_error(message)); +} + +template struct is_integral : std::is_integral {}; +template <> struct is_integral : std::true_type {}; +template <> struct is_integral : std::true_type {}; template using is_signed = @@ -789,16 +823,16 @@ using is_signed = // Returns true if value is negative, false otherwise. // Same as `value < 0` but doesn't produce warnings if T is an unsigned type. template ::value)> -FMT_CONSTEXPR bool is_negative(T value) { +FMT_CONSTEXPR auto is_negative(T value) -> bool { return value < 0; } template ::value)> -FMT_CONSTEXPR bool is_negative(T) { +FMT_CONSTEXPR auto is_negative(T) -> bool { return false; } template ::value)> -FMT_CONSTEXPR bool is_supported_floating_point(T) { +FMT_CONSTEXPR auto is_supported_floating_point(T) -> uint16_t { return (std::is_same::value && FMT_USE_FLOAT) || (std::is_same::value && FMT_USE_DOUBLE) || (std::is_same::value && FMT_USE_LONG_DOUBLE); @@ -811,121 +845,56 @@ using uint32_or_64_or_128_t = conditional_t() <= 32 && !FMT_REDUCE_INT_INSTANTIATIONS, uint32_t, conditional_t() <= 64, uint64_t, uint128_t>>; +template +using uint64_or_128_t = conditional_t() <= 64, uint64_t, uint128_t>; -// 128-bit integer type used internally -struct FMT_EXTERN_TEMPLATE_API uint128_wrapper { - uint128_wrapper() = default; - -#if FMT_USE_INT128 - uint128_t internal_; - - uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT - : internal_{static_cast(low) | - (static_cast(high) << 64)} {} - - uint128_wrapper(uint128_t u) : internal_{u} {} - - uint64_t high() const FMT_NOEXCEPT { return uint64_t(internal_ >> 64); } - uint64_t low() const FMT_NOEXCEPT { return uint64_t(internal_); } - - uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT { - internal_ += n; - return *this; - } -#else - uint64_t high_; - uint64_t low_; - - uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT : high_{high}, - low_{low} {} - - uint64_t high() const FMT_NOEXCEPT { return high_; } - uint64_t low() const FMT_NOEXCEPT { return low_; } - - uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT { -# if defined(_MSC_VER) && defined(_M_X64) - unsigned char carry = _addcarry_u64(0, low_, n, &low_); - _addcarry_u64(carry, high_, 0, &high_); - return *this; -# else - uint64_t sum = low_ + n; - high_ += (sum < low_ ? 1 : 0); - low_ = sum; - return *this; -# endif - } -#endif -}; - -// Table entry type for divisibility test used internally -template struct FMT_EXTERN_TEMPLATE_API divtest_table_entry { - T mod_inv; - T max_quotient; -}; +#define FMT_POWERS_OF_10(factor) \ + factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ + (factor)*1000000, (factor)*10000000, (factor)*100000000, \ + (factor)*1000000000 // Static data is placed in this class template for the header-only config. -template struct FMT_EXTERN_TEMPLATE_API basic_data { - static const uint64_t powers_of_10_64[]; - static const uint32_t zero_or_powers_of_10_32_new[]; - static const uint64_t zero_or_powers_of_10_64_new[]; - static const uint64_t grisu_pow10_significands[]; - static const int16_t grisu_pow10_exponents[]; - static const divtest_table_entry divtest_table_for_pow5_32[]; - static const divtest_table_entry divtest_table_for_pow5_64[]; - static const uint64_t dragonbox_pow10_significands_64[]; - static const uint128_wrapper dragonbox_pow10_significands_128[]; +template struct basic_data { // log10(2) = 0x0.4d104d427de7fbcc... static const uint64_t log10_2_significand = 0x4d104d427de7fbcc; -#if !FMT_USE_FULL_CACHE_DRAGONBOX - static const uint64_t powers_of_5_64[]; - static const uint32_t dragonbox_pow10_recovery_errors[]; -#endif - // GCC generates slightly better code for pairs than chars. - using digit_pair = char[2]; - static const digit_pair digits[]; - static const char hex_digits[]; - static const char foreground_color[]; - static const char background_color[]; - static const char reset_color[5]; - static const wchar_t wreset_color[5]; - static const char signs[]; - static const char left_padding_shifts[5]; - static const char right_padding_shifts[5]; - // DEPRECATED! These are for ABI compatibility. - static const uint32_t zero_or_powers_of_10_32[]; - static const uint64_t zero_or_powers_of_10_64[]; + // GCC generates slightly better code for pairs than chars. + FMT_API static constexpr const char digits[][2] = { + {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, + {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, + {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, + {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, + {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, + {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, + {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, + {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, + {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, + {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, + {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, + {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'}, + {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, + {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, + {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, + {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, + {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; + + FMT_API static constexpr const char hex_digits[] = "0123456789abcdef"; + FMT_API static constexpr const char signs[] = {0, '-', '+', ' '}; + FMT_API static constexpr const unsigned prefixes[4] = {0, 0, 0x1000000u | '+', + 0x1000000u | ' '}; + FMT_API static constexpr const char left_padding_shifts[] = {31, 31, 0, 1, 0}; + FMT_API static constexpr const char right_padding_shifts[] = {0, 31, 0, 1, 0}; }; -// Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)). -// This is a function instead of an array to workaround a bug in GCC10 (#1810). -FMT_INLINE uint16_t bsr2log10(int bsr) { - static constexpr uint16_t data[] = { - 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, - 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, - 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, - 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20}; - return data[bsr]; -} - -#ifndef FMT_EXPORTED -FMT_EXTERN template struct basic_data; +#ifdef FMT_SHARED +// Required for -flto, -fivisibility=hidden and -shared to work +extern template struct basic_data; #endif // This is a struct rather than an alias to avoid shadowing warnings in gcc. struct data : basic_data<> {}; -#ifdef FMT_BUILTIN_CLZLL -// Returns the number of decimal digits in n. Leading zeros are not counted -// except for n == 0 in which case count_digits returns 1. -inline int count_digits(uint64_t n) { - // https://github.com/fmtlib/format-benchmark/blob/master/digits10 - auto t = bsr2log10(FMT_BUILTIN_CLZLL(n | 1) ^ 63); - return t - (n < data::zero_or_powers_of_10_64_new[t]); -} -#else -// Fallback version of count_digits used when __builtin_clz is not available. -inline int count_digits(uint64_t n) { +template FMT_CONSTEXPR auto count_digits_fallback(T n) -> int { int count = 1; for (;;) { // Integer division is slow so do it for a group of four digits instead @@ -939,27 +908,41 @@ inline int count_digits(uint64_t n) { count += 4; } } -#endif - #if FMT_USE_INT128 -inline int count_digits(uint128_t n) { - int count = 1; - for (;;) { - // Integer division is slow so do it for a group of four digits instead - // of for every digit. The idea comes from the talk by Alexandrescu - // "Three Optimization Tips for C++". See speed-test for a comparison. - if (n < 10) return count; - if (n < 100) return count + 1; - if (n < 1000) return count + 2; - if (n < 10000) return count + 3; - n /= 10000U; - count += 4; - } +FMT_CONSTEXPR inline auto count_digits(uint128_t n) -> int { + return count_digits_fallback(n); } #endif +// Returns the number of decimal digits in n. Leading zeros are not counted +// except for n == 0 in which case count_digits returns 1. +FMT_CONSTEXPR20 inline auto count_digits(uint64_t n) -> int { +#ifdef FMT_BUILTIN_CLZLL + if (!is_constant_evaluated()) { + // https://github.com/fmtlib/format-benchmark/blob/master/digits10 + // Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)). + constexpr uint16_t bsr2log10[] = { + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, + 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, + 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, + 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20}; + auto t = bsr2log10[FMT_BUILTIN_CLZLL(n | 1) ^ 63]; + constexpr const uint64_t zero_or_powers_of_10[] = { + 0, 0, FMT_POWERS_OF_10(1U), FMT_POWERS_OF_10(1000000000ULL), + 10000000000000000000ULL}; + return t - (n < zero_or_powers_of_10[t]); + } +#endif + return count_digits_fallback(n); +} + // Counts the number of digits in n. BITS = log2(radix). -template inline int count_digits(UInt n) { +template +FMT_CONSTEXPR auto count_digits(UInt n) -> int { +#ifdef FMT_BUILTIN_CLZ + if (num_bits() == 32) + return (FMT_BUILTIN_CLZ(static_cast(n) | 1) ^ 31) / BITS + 1; +#endif int num_digits = 0; do { ++num_digits; @@ -967,66 +950,82 @@ template inline int count_digits(UInt n) { return num_digits; } -template <> int count_digits<4>(detail::fallback_uintptr n); +template <> auto count_digits<4>(detail::fallback_uintptr n) -> int; -#if FMT_GCC_VERSION || FMT_CLANG_VERSION -# define FMT_ALWAYS_INLINE inline __attribute__((always_inline)) -#elif FMT_MSC_VER -# define FMT_ALWAYS_INLINE __forceinline -#else -# define FMT_ALWAYS_INLINE inline -#endif - -// To suppress unnecessary security cookie checks -#if FMT_MSC_VER && !FMT_CLANG_VERSION -# define FMT_SAFEBUFFERS __declspec(safebuffers) -#else -# define FMT_SAFEBUFFERS -#endif - -#ifdef FMT_BUILTIN_CLZ -// Optional version of count_digits for better performance on 32-bit platforms. -inline int count_digits(uint32_t n) { - auto t = bsr2log10(FMT_BUILTIN_CLZ(n | 1) ^ 31); - return t - (n < data::zero_or_powers_of_10_32_new[t]); +// It is a separate function rather than a part of count_digits to workaround +// the lack of static constexpr in constexpr functions. +FMT_INLINE uint64_t count_digits_inc(int n) { + // An optimization by Kendall Willets from https://bit.ly/3uOIQrB. + // This increments the upper 32 bits (log10(T) - 1) when >= T is added. +#define FMT_INC(T) (((sizeof(#T) - 1ull) << 32) - T) + static constexpr uint64_t table[] = { + FMT_INC(0), FMT_INC(0), FMT_INC(0), // 8 + FMT_INC(10), FMT_INC(10), FMT_INC(10), // 64 + FMT_INC(100), FMT_INC(100), FMT_INC(100), // 512 + FMT_INC(1000), FMT_INC(1000), FMT_INC(1000), // 4096 + FMT_INC(10000), FMT_INC(10000), FMT_INC(10000), // 32k + FMT_INC(100000), FMT_INC(100000), FMT_INC(100000), // 256k + FMT_INC(1000000), FMT_INC(1000000), FMT_INC(1000000), // 2048k + FMT_INC(10000000), FMT_INC(10000000), FMT_INC(10000000), // 16M + FMT_INC(100000000), FMT_INC(100000000), FMT_INC(100000000), // 128M + FMT_INC(1000000000), FMT_INC(1000000000), FMT_INC(1000000000), // 1024M + FMT_INC(1000000000), FMT_INC(1000000000) // 4B + }; + return table[n]; } -#endif -template constexpr int digits10() FMT_NOEXCEPT { +// Optional version of count_digits for better performance on 32-bit platforms. +FMT_CONSTEXPR20 inline auto count_digits(uint32_t n) -> int { +#ifdef FMT_BUILTIN_CLZ + if (!is_constant_evaluated()) { + auto inc = count_digits_inc(FMT_BUILTIN_CLZ(n | 1) ^ 31); + return static_cast((n + inc) >> 32); + } +#endif + return count_digits_fallback(n); +} + +template constexpr auto digits10() FMT_NOEXCEPT -> int { return std::numeric_limits::digits10; } -template <> constexpr int digits10() FMT_NOEXCEPT { return 38; } -template <> constexpr int digits10() FMT_NOEXCEPT { return 38; } - -template FMT_API std::string grouping_impl(locale_ref loc); -template inline std::string grouping(locale_ref loc) { - return grouping_impl(loc); +template <> constexpr auto digits10() FMT_NOEXCEPT -> int { + return 38; } -template <> inline std::string grouping(locale_ref loc) { - return grouping_impl(loc); +template <> constexpr auto digits10() FMT_NOEXCEPT -> int { + return 38; } -template FMT_API Char thousands_sep_impl(locale_ref loc); -template inline Char thousands_sep(locale_ref loc) { - return Char(thousands_sep_impl(loc)); +template struct thousands_sep_result { + std::string grouping; + Char thousands_sep; +}; + +template +FMT_API auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result; +template +inline auto thousands_sep(locale_ref loc) -> thousands_sep_result { + auto result = thousands_sep_impl(loc); + return {result.grouping, Char(result.thousands_sep)}; } -template <> inline wchar_t thousands_sep(locale_ref loc) { +template <> +inline auto thousands_sep(locale_ref loc) -> thousands_sep_result { return thousands_sep_impl(loc); } -template FMT_API Char decimal_point_impl(locale_ref loc); -template inline Char decimal_point(locale_ref loc) { +template +FMT_API auto decimal_point_impl(locale_ref loc) -> Char; +template inline auto decimal_point(locale_ref loc) -> Char { return Char(decimal_point_impl(loc)); } -template <> inline wchar_t decimal_point(locale_ref loc) { +template <> inline auto decimal_point(locale_ref loc) -> wchar_t { return decimal_point_impl(loc); } // Compares two characters for equality. -template bool equal2(const Char* lhs, const char* rhs) { +template auto equal2(const Char* lhs, const char* rhs) -> bool { return lhs[0] == rhs[0] && lhs[1] == rhs[1]; } -inline bool equal2(const char* lhs, const char* rhs) { +inline auto equal2(const char* lhs, const char* rhs) -> bool { return memcmp(lhs, rhs, 2) == 0; } @@ -1046,11 +1045,19 @@ template struct format_decimal_result { // buffer of specified size. The caller must ensure that the buffer is large // enough. template -inline format_decimal_result format_decimal(Char* out, UInt value, - int size) { +FMT_CONSTEXPR20 auto format_decimal(Char* out, UInt value, int size) + -> format_decimal_result { FMT_ASSERT(size >= count_digits(value), "invalid digit count"); out += size; Char* end = out; + if (is_constant_evaluated()) { + while (value >= 10) { + *--out = static_cast('0' + value % 10); + value /= 10; + } + *--out = static_cast('0' + value); + return {out, end}; + } while (value >= 100) { // Integer division is slow so do it for a group of two digits instead // of for every digit. The idea comes from the talk by Alexandrescu @@ -1070,17 +1077,17 @@ inline format_decimal_result format_decimal(Char* out, UInt value, template >::value)> -inline format_decimal_result format_decimal(Iterator out, UInt value, - int size) { +inline auto format_decimal(Iterator out, UInt value, int size) + -> format_decimal_result { // Buffer is large enough to hold all digits (digits10 + 1). Char buffer[digits10() + 1]; auto end = format_decimal(buffer, value, size).end; - return {out, detail::copy_str(buffer, end, out)}; + return {out, detail::copy_str_noinline(buffer, end, out)}; } template -inline Char* format_uint(Char* buffer, UInt value, int num_digits, - bool upper = false) { +FMT_CONSTEXPR auto format_uint(Char* buffer, UInt value, int num_digits, + bool upper = false) -> Char* { buffer += num_digits; Char* end = buffer; do { @@ -1093,8 +1100,8 @@ inline Char* format_uint(Char* buffer, UInt value, int num_digits, } template -Char* format_uint(Char* buffer, detail::fallback_uintptr n, int num_digits, - bool = false) { +auto format_uint(Char* buffer, detail::fallback_uintptr n, int num_digits, + bool = false) -> Char* { auto char_digits = std::numeric_limits::digits / 4; int start = (num_digits + char_digits - 1) / char_digits - 1; if (int start_digits = num_digits % char_digits) { @@ -1115,7 +1122,8 @@ Char* format_uint(Char* buffer, detail::fallback_uintptr n, int num_digits, } template -inline It format_uint(It out, UInt value, int num_digits, bool upper = false) { +inline auto format_uint(It out, UInt value, int num_digits, bool upper = false) + -> It { if (auto ptr = to_pointer(out, to_unsigned(num_digits))) { format_uint(ptr, value, num_digits, upper); return out; @@ -1123,86 +1131,22 @@ inline It format_uint(It out, UInt value, int num_digits, bool upper = false) { // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). char buffer[num_bits() / BASE_BITS + 1]; format_uint(buffer, value, num_digits, upper); - return detail::copy_str(buffer, buffer + num_digits, out); + return detail::copy_str_noinline(buffer, buffer + num_digits, out); } // A converter from UTF-8 to UTF-16. class utf8_to_utf16 { private: - wmemory_buffer buffer_; + basic_memory_buffer buffer_; public: FMT_API explicit utf8_to_utf16(string_view s); - operator wstring_view() const { return {&buffer_[0], size()}; } - size_t size() const { return buffer_.size() - 1; } - const wchar_t* c_str() const { return &buffer_[0]; } - std::wstring str() const { return {&buffer_[0], size()}; } + operator basic_string_view() const { return {&buffer_[0], size()}; } + auto size() const -> size_t { return buffer_.size() - 1; } + auto c_str() const -> const wchar_t* { return &buffer_[0]; } + auto str() const -> std::wstring { return {&buffer_[0], size()}; } }; -template struct null {}; - -// Workaround an array initialization issue in gcc 4.8. -template struct fill_t { - private: - enum { max_size = 4 }; - Char data_[max_size] = {Char(' '), Char(0), Char(0), Char(0)}; - unsigned char size_ = 1; - - public: - FMT_CONSTEXPR void operator=(basic_string_view s) { - auto size = s.size(); - if (size > max_size) { - FMT_THROW(format_error("invalid fill")); - return; - } - for (size_t i = 0; i < size; ++i) data_[i] = s[i]; - size_ = static_cast(size); - } - - size_t size() const { return size_; } - const Char* data() const { return data_; } - - FMT_CONSTEXPR Char& operator[](size_t index) { return data_[index]; } - FMT_CONSTEXPR const Char& operator[](size_t index) const { - return data_[index]; - } -}; -} // namespace detail - -// We cannot use enum classes as bit fields because of a gcc bug -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414. -namespace align { -enum type { none, left, right, center, numeric }; -} -using align_t = align::type; - -namespace sign { -enum type { none, minus, plus, space }; -} -using sign_t = sign::type; - -// Format specifiers for built-in and string types. -template struct basic_format_specs { - int width; - int precision; - char type; - align_t align : 4; - sign_t sign : 3; - bool alt : 1; // Alternate form ('#'). - detail::fill_t fill; - - constexpr basic_format_specs() - : width(0), - precision(-1), - type(0), - align(align::none), - sign(sign::none), - alt(false) {} -}; - -using format_specs = basic_format_specs; - -namespace detail { namespace dragonbox { // Type-specific information that Dragonbox uses. @@ -1266,37 +1210,21 @@ template struct decimal_fp { int exponent; }; -template FMT_API decimal_fp to_decimal(T x) FMT_NOEXCEPT; +template +FMT_API auto to_decimal(T x) FMT_NOEXCEPT -> decimal_fp; } // namespace dragonbox template -constexpr typename dragonbox::float_info::carrier_uint exponent_mask() { +constexpr auto exponent_mask() -> + typename dragonbox::float_info::carrier_uint { using uint = typename dragonbox::float_info::carrier_uint; return ((uint(1) << dragonbox::float_info::exponent_bits) - 1) << dragonbox::float_info::significand_bits; } -// A floating-point presentation format. -enum class float_format : unsigned char { - general, // General: exponent notation or fixed point based on magnitude. - exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. - fixed, // Fixed point with the default precision of 6, e.g. 0.0012. - hex -}; - -struct float_specs { - int precision; - float_format format : 8; - sign_t sign : 8; - bool upper : 1; - bool locale : 1; - bool binary32 : 1; - bool use_grisu : 1; - bool showpoint : 1; -}; - // Writes the exponent exp in the form "[+-]d{2,3}" to buffer. -template It write_exponent(int exp, It it) { +template +auto write_exponent(int exp, It it) -> It { FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range"); if (exp < 0) { *it++ = static_cast('-'); @@ -1317,173 +1245,27 @@ template It write_exponent(int exp, It it) { } template -int format_float(T value, int precision, float_specs specs, buffer& buf); +auto format_float(T value, int precision, float_specs specs, buffer& buf) + -> int; // Formats a floating-point number with snprintf. template -int snprintf_float(T value, int precision, float_specs specs, - buffer& buf); +auto snprintf_float(T value, int precision, float_specs specs, + buffer& buf) -> int; -template T promote_float(T value) { return value; } -inline double promote_float(float value) { return static_cast(value); } - -template -FMT_CONSTEXPR void handle_int_type_spec(char spec, Handler&& handler) { - switch (spec) { - case 0: - case 'd': - handler.on_dec(); - break; - case 'x': - case 'X': - handler.on_hex(); - break; - case 'b': - case 'B': - handler.on_bin(); - break; - case 'o': - handler.on_oct(); - break; -#ifdef FMT_DEPRECATED_N_SPECIFIER - case 'n': -#endif - case 'L': - handler.on_num(); - break; - case 'c': - handler.on_chr(); - break; - default: - handler.on_error(); - } +template auto promote_float(T value) -> T { return value; } +inline auto promote_float(float value) -> double { + return static_cast(value); } -template -FMT_CONSTEXPR float_specs parse_float_type_spec( - const basic_format_specs& specs, ErrorHandler&& eh = {}) { - auto result = float_specs(); - result.showpoint = specs.alt; - switch (specs.type) { - case 0: - result.format = float_format::general; - result.showpoint |= specs.precision > 0; - break; - case 'G': - result.upper = true; - FMT_FALLTHROUGH; - case 'g': - result.format = float_format::general; - break; - case 'E': - result.upper = true; - FMT_FALLTHROUGH; - case 'e': - result.format = float_format::exp; - result.showpoint |= specs.precision != 0; - break; - case 'F': - result.upper = true; - FMT_FALLTHROUGH; - case 'f': - result.format = float_format::fixed; - result.showpoint |= specs.precision != 0; - break; - case 'A': - result.upper = true; - FMT_FALLTHROUGH; - case 'a': - result.format = float_format::hex; - break; -#ifdef FMT_DEPRECATED_N_SPECIFIER - case 'n': -#endif - case 'L': - result.locale = true; - break; - default: - eh.on_error("invalid type specifier"); - break; - } - return result; -} - -template -FMT_CONSTEXPR void handle_char_specs(const basic_format_specs* specs, - Handler&& handler) { - if (!specs) return handler.on_char(); - if (specs->type && specs->type != 'c') return handler.on_int(); - if (specs->align == align::numeric || specs->sign != sign::none || specs->alt) - handler.on_error("invalid format specifier for char"); - handler.on_char(); -} - -template -FMT_CONSTEXPR void handle_cstring_type_spec(Char spec, Handler&& handler) { - if (spec == 0 || spec == 's') - handler.on_string(); - else if (spec == 'p') - handler.on_pointer(); - else - handler.on_error("invalid type specifier"); -} - -template -FMT_CONSTEXPR void check_string_type_spec(Char spec, ErrorHandler&& eh) { - if (spec != 0 && spec != 's') eh.on_error("invalid type specifier"); -} - -template -FMT_CONSTEXPR void check_pointer_type_spec(Char spec, ErrorHandler&& eh) { - if (spec != 0 && spec != 'p') eh.on_error("invalid type specifier"); -} - -template class int_type_checker : private ErrorHandler { - public: - FMT_CONSTEXPR explicit int_type_checker(ErrorHandler eh) : ErrorHandler(eh) {} - - FMT_CONSTEXPR void on_dec() {} - FMT_CONSTEXPR void on_hex() {} - FMT_CONSTEXPR void on_bin() {} - FMT_CONSTEXPR void on_oct() {} - FMT_CONSTEXPR void on_num() {} - FMT_CONSTEXPR void on_chr() {} - - FMT_CONSTEXPR void on_error() { - ErrorHandler::on_error("invalid type specifier"); - } -}; - -template -class char_specs_checker : public ErrorHandler { - private: - char type_; - - public: - FMT_CONSTEXPR char_specs_checker(char type, ErrorHandler eh) - : ErrorHandler(eh), type_(type) {} - - FMT_CONSTEXPR void on_int() { - handle_int_type_spec(type_, int_type_checker(*this)); - } - FMT_CONSTEXPR void on_char() {} -}; - -template -class cstring_type_checker : public ErrorHandler { - public: - FMT_CONSTEXPR explicit cstring_type_checker(ErrorHandler eh) - : ErrorHandler(eh) {} - - FMT_CONSTEXPR void on_string() {} - FMT_CONSTEXPR void on_pointer() {} -}; - template -FMT_NOINLINE OutputIt fill(OutputIt it, size_t n, const fill_t& fill) { +FMT_NOINLINE FMT_CONSTEXPR auto fill(OutputIt it, size_t n, + const fill_t& fill) -> OutputIt { auto fill_size = fill.size(); - if (fill_size == 1) return std::fill_n(it, n, fill[0]); - for (size_t i = 0; i < n; ++i) it = std::copy_n(fill.data(), fill_size, it); + if (fill_size == 1) return detail::fill_n(it, n, fill[0]); + auto data = fill.data(); + for (size_t i = 0; i < n; ++i) + it = copy_str(data, data + fill_size, it); return it; } @@ -1492,39 +1274,72 @@ FMT_NOINLINE OutputIt fill(OutputIt it, size_t n, const fill_t& fill) { // width: output display width in (terminal) column positions. template -inline OutputIt write_padded(OutputIt out, - const basic_format_specs& specs, size_t size, - size_t width, F&& f) { +FMT_CONSTEXPR auto write_padded(OutputIt out, + const basic_format_specs& specs, + size_t size, size_t width, F&& f) -> OutputIt { static_assert(align == align::left || align == align::right, ""); unsigned spec_width = to_unsigned(specs.width); size_t padding = spec_width > width ? spec_width - width : 0; auto* shifts = align == align::left ? data::left_padding_shifts : data::right_padding_shifts; size_t left_padding = padding >> shifts[specs.align]; + size_t right_padding = padding - left_padding; auto it = reserve(out, size + padding * specs.fill.size()); - it = fill(it, left_padding, specs.fill); + if (left_padding != 0) it = fill(it, left_padding, specs.fill); it = f(it); - it = fill(it, padding - left_padding, specs.fill); + if (right_padding != 0) it = fill(it, right_padding, specs.fill); return base_iterator(out, it); } template -inline OutputIt write_padded(OutputIt out, - const basic_format_specs& specs, size_t size, - F&& f) { +constexpr auto write_padded(OutputIt out, const basic_format_specs& specs, + size_t size, F&& f) -> OutputIt { return write_padded(out, specs, size, size, f); } +template +FMT_CONSTEXPR auto write_bytes(OutputIt out, string_view bytes, + const basic_format_specs& specs) + -> OutputIt { + return write_padded( + out, specs, bytes.size(), [bytes](reserve_iterator it) { + const char* data = bytes.data(); + return copy_str(data, data + bytes.size(), it); + }); +} + +template +auto write_ptr(OutputIt out, UIntPtr value, + const basic_format_specs* specs) -> OutputIt { + int num_digits = count_digits<4>(value); + auto size = to_unsigned(num_digits) + size_t(2); + auto write = [=](reserve_iterator it) { + *it++ = static_cast('0'); + *it++ = static_cast('x'); + return format_uint<4, Char>(it, value, num_digits); + }; + return specs ? write_padded(out, *specs, size, write) + : base_iterator(out, write(reserve(out, size))); +} + template -OutputIt write_bytes(OutputIt out, string_view bytes, - const basic_format_specs& specs) { - using iterator = remove_reference_t; - return write_padded(out, specs, bytes.size(), [bytes](iterator it) { - const char* data = bytes.data(); - return copy_str(data, data + bytes.size(), it); +FMT_CONSTEXPR auto write_char(OutputIt out, Char value, + const basic_format_specs& specs) + -> OutputIt { + return write_padded(out, specs, 1, [=](reserve_iterator it) { + *it++ = value; + return it; }); } +template +FMT_CONSTEXPR auto write(OutputIt out, Char value, + const basic_format_specs& specs, + locale_ref loc = {}) -> OutputIt { + return check_char_specs(specs) + ? write_char(out, value, specs) + : write(out, static_cast(value), specs, loc); +} // Data for write_int that doesn't depend on output iterator type. It is used to // avoid template code bloat. @@ -1532,9 +1347,9 @@ template struct write_int_data { size_t size; size_t padding; - write_int_data(int num_digits, string_view prefix, - const basic_format_specs& specs) - : size(prefix.size() + to_unsigned(num_digits)), padding(0) { + FMT_CONSTEXPR write_int_data(int num_digits, unsigned prefix, + const basic_format_specs& specs) + : size((prefix >> 24) + to_unsigned(num_digits)), padding(0) { if (specs.align == align::numeric) { auto width = to_unsigned(specs.width); if (width > size) { @@ -1542,7 +1357,7 @@ template struct write_int_data { size = width; } } else if (specs.precision > num_digits) { - size = prefix.size() + to_unsigned(specs.precision); + size = (prefix >> 24) + to_unsigned(specs.precision); padding = to_unsigned(specs.precision - num_digits); } } @@ -1550,182 +1365,232 @@ template struct write_int_data { // Writes an integer in the format // -// where are written by f(it). -template -OutputIt write_int(OutputIt out, int num_digits, string_view prefix, - const basic_format_specs& specs, F f) { - auto data = write_int_data(num_digits, prefix, specs); - using iterator = remove_reference_t; - return write_padded(out, specs, data.size, [=](iterator it) { - if (prefix.size() != 0) - it = copy_str(prefix.begin(), prefix.end(), it); - it = std::fill_n(it, data.padding, static_cast('0')); - return f(it); - }); -} - -template -OutputIt write(OutputIt out, basic_string_view s, - const basic_format_specs& specs) { - auto data = s.data(); - auto size = s.size(); - if (specs.precision >= 0 && to_unsigned(specs.precision) < size) - size = code_point_index(s, to_unsigned(specs.precision)); - auto width = specs.width != 0 - ? count_code_points(basic_string_view(data, size)) - : 0; - using iterator = remove_reference_t; - return write_padded(out, specs, size, width, [=](iterator it) { - return copy_str(data, data + size, it); - }); -} - -// The handle_int_type_spec handler that writes an integer. -template struct int_writer { - OutputIt out; - locale_ref locale; - const basic_format_specs& specs; - UInt abs_value; - char prefix[4]; - unsigned prefix_size; - - using iterator = - remove_reference_t(), 0))>; - - string_view get_prefix() const { return string_view(prefix, prefix_size); } - - template - int_writer(OutputIt output, locale_ref loc, Int value, - const basic_format_specs& s) - : out(output), - locale(loc), - specs(s), - abs_value(static_cast(value)), - prefix_size(0) { - static_assert(std::is_same, UInt>::value, ""); - if (is_negative(value)) { - prefix[0] = '-'; - ++prefix_size; - abs_value = 0 - abs_value; - } else if (specs.sign != sign::none && specs.sign != sign::minus) { - prefix[0] = specs.sign == sign::plus ? '+' : ' '; - ++prefix_size; +// where are written by write_digits(it). +// prefix contains chars in three lower bytes and the size in the fourth byte. +template +FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, int num_digits, + unsigned prefix, + const basic_format_specs& specs, + W write_digits) -> OutputIt { + // Slightly faster check for specs.width == 0 && specs.precision == -1. + if ((specs.width | (specs.precision + 1)) == 0) { + auto it = reserve(out, to_unsigned(num_digits) + (prefix >> 24)); + if (prefix != 0) { + for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) + *it++ = static_cast(p & 0xff); } + return base_iterator(out, write_digits(it)); } + auto data = write_int_data(num_digits, prefix, specs); + return write_padded( + out, specs, data.size, [=](reserve_iterator it) { + for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) + *it++ = static_cast(p & 0xff); + it = detail::fill_n(it, data.padding, static_cast('0')); + return write_digits(it); + }); +} - void on_dec() { +template +auto write_int_localized(OutputIt& out, UInt value, unsigned prefix, + const basic_format_specs& specs, locale_ref loc) + -> bool { + static_assert(std::is_same, UInt>::value, ""); + const auto sep_size = 1; + auto ts = thousands_sep(loc); + if (!ts.thousands_sep) return false; + int num_digits = count_digits(value); + int size = num_digits, n = num_digits; + const std::string& groups = ts.grouping; + std::string::const_iterator group = groups.cbegin(); + while (group != groups.cend() && n > *group && *group > 0 && + *group != max_value()) { + size += sep_size; + n -= *group; + ++group; + } + if (group == groups.cend()) size += sep_size * ((n - 1) / groups.back()); + char digits[40]; + format_decimal(digits, value, num_digits); + basic_memory_buffer buffer; + if (prefix != 0) ++size; + const auto usize = to_unsigned(size); + buffer.resize(usize); + basic_string_view s(&ts.thousands_sep, sep_size); + // Index of a decimal digit with the least significant digit having index 0. + int digit_index = 0; + group = groups.cbegin(); + auto p = buffer.data() + size - 1; + for (int i = num_digits - 1; i > 0; --i) { + *p-- = static_cast(digits[i]); + if (*group <= 0 || ++digit_index % *group != 0 || + *group == max_value()) + continue; + if (group + 1 != groups.cend()) { + digit_index = 0; + ++group; + } + std::uninitialized_copy(s.data(), s.data() + s.size(), + make_checked(p, s.size())); + p -= s.size(); + } + *p-- = static_cast(*digits); + if (prefix != 0) *p = static_cast(prefix); + auto data = buffer.data(); + out = write_padded( + out, specs, usize, usize, [=](reserve_iterator it) { + return copy_str(data, data + size, it); + }); + return true; +} + +FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { + prefix |= prefix != 0 ? value << 8 : value; + prefix += (1u + (value > 0xff ? 1 : 0)) << 24; +} + +template struct write_int_arg { + UInt abs_value; + unsigned prefix; +}; + +template +FMT_CONSTEXPR auto make_write_int_arg(T value, sign_t sign) + -> write_int_arg> { + auto prefix = 0u; + auto abs_value = static_cast>(value); + if (is_negative(value)) { + prefix = 0x01000000 | '-'; + abs_value = 0 - abs_value; + } else { + prefix = data::prefixes[sign]; + } + return {abs_value, prefix}; +} + +template +FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, write_int_arg arg, + const basic_format_specs& specs, + locale_ref loc) -> OutputIt { + static_assert(std::is_same>::value, ""); + auto abs_value = arg.abs_value; + auto prefix = arg.prefix; + auto utype = static_cast(specs.type); + switch (specs.type) { + case 0: + case 'd': { + if (specs.localized && + write_int_localized(out, static_cast>(abs_value), + prefix, specs, loc)) { + return out; + } auto num_digits = count_digits(abs_value); - out = write_int( - out, num_digits, get_prefix(), specs, [this, num_digits](iterator it) { + return write_int( + out, num_digits, prefix, specs, [=](reserve_iterator it) { return format_decimal(it, abs_value, num_digits).end; }); } - - void on_hex() { - if (specs.alt) { - prefix[prefix_size++] = '0'; - prefix[prefix_size++] = specs.type; - } + case 'x': + case 'X': { + if (specs.alt) prefix_append(prefix, (utype << 8) | '0'); + bool upper = specs.type != 'x'; int num_digits = count_digits<4>(abs_value); - out = write_int(out, num_digits, get_prefix(), specs, - [this, num_digits](iterator it) { - return format_uint<4, Char>(it, abs_value, num_digits, - specs.type != 'x'); - }); + return write_int( + out, num_digits, prefix, specs, [=](reserve_iterator it) { + return format_uint<4, Char>(it, abs_value, num_digits, upper); + }); } - - void on_bin() { - if (specs.alt) { - prefix[prefix_size++] = '0'; - prefix[prefix_size++] = static_cast(specs.type); - } + case 'b': + case 'B': { + if (specs.alt) prefix_append(prefix, (utype << 8) | '0'); int num_digits = count_digits<1>(abs_value); - out = write_int(out, num_digits, get_prefix(), specs, - [this, num_digits](iterator it) { - return format_uint<1, Char>(it, abs_value, num_digits); - }); + return write_int(out, num_digits, prefix, specs, + [=](reserve_iterator it) { + return format_uint<1, Char>(it, abs_value, num_digits); + }); } - - void on_oct() { + case 'o': { int num_digits = count_digits<3>(abs_value); if (specs.alt && specs.precision <= num_digits && abs_value != 0) { // Octal prefix '0' is counted as a digit, so only add it if precision // is not greater than the number of digits. - prefix[prefix_size++] = '0'; + prefix_append(prefix, '0'); } - out = write_int(out, num_digits, get_prefix(), specs, - [this, num_digits](iterator it) { - return format_uint<3, Char>(it, abs_value, num_digits); - }); + return write_int(out, num_digits, prefix, specs, + [=](reserve_iterator it) { + return format_uint<3, Char>(it, abs_value, num_digits); + }); } - - enum { sep_size = 1 }; - - void on_num() { - std::string groups = grouping(locale); - if (groups.empty()) return on_dec(); - auto sep = thousands_sep(locale); - if (!sep) return on_dec(); - int num_digits = count_digits(abs_value); - int size = num_digits, n = num_digits; - std::string::const_iterator group = groups.cbegin(); - while (group != groups.cend() && n > *group && *group > 0 && - *group != max_value()) { - size += sep_size; - n -= *group; - ++group; - } - if (group == groups.cend()) size += sep_size * ((n - 1) / groups.back()); - char digits[40]; - format_decimal(digits, abs_value, num_digits); - basic_memory_buffer buffer; - size += static_cast(prefix_size); - const auto usize = to_unsigned(size); - buffer.resize(usize); - basic_string_view s(&sep, sep_size); - // Index of a decimal digit with the least significant digit having index 0. - int digit_index = 0; - group = groups.cbegin(); - auto p = buffer.data() + size - 1; - for (int i = num_digits - 1; i > 0; --i) { - *p-- = static_cast(digits[i]); - if (*group <= 0 || ++digit_index % *group != 0 || - *group == max_value()) - continue; - if (group + 1 != groups.cend()) { - digit_index = 0; - ++group; - } - std::uninitialized_copy(s.data(), s.data() + s.size(), - make_checked(p, s.size())); - p -= s.size(); - } - *p-- = static_cast(*digits); - if (prefix_size != 0) *p = static_cast('-'); - auto data = buffer.data(); - out = write_padded( - out, specs, usize, usize, - [=](iterator it) { return copy_str(data, data + size, it); }); - } - - void on_chr() { *out++ = static_cast(abs_value); } - - FMT_NORETURN void on_error() { + case 'c': + return write_char(out, static_cast(abs_value), specs); + default: FMT_THROW(format_error("invalid type specifier")); } -}; + return out; +} +template ::value && + !std::is_same::value && + std::is_same>::value)> +FMT_CONSTEXPR auto write(OutputIt out, T value, + const basic_format_specs& specs, locale_ref loc) + -> OutputIt { + return write_int(out, make_write_int_arg(value, specs.sign), specs, loc); +} +// An inlined version of write used in format string compilation. +template ::value && + !std::is_same::value && + !std::is_same>::value)> +FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value, + const basic_format_specs& specs, + locale_ref loc) -> OutputIt { + return write_int(out, make_write_int_arg(value, specs.sign), specs, loc); +} template -OutputIt write_nonfinite(OutputIt out, bool isinf, - const basic_format_specs& specs, - const float_specs& fspecs) { +FMT_CONSTEXPR auto write(OutputIt out, basic_string_view s, + const basic_format_specs& specs) -> OutputIt { + auto data = s.data(); + auto size = s.size(); + if (specs.precision >= 0 && to_unsigned(specs.precision) < size) + size = code_point_index(s, to_unsigned(specs.precision)); + auto width = + specs.width != 0 ? compute_width(basic_string_view(data, size)) : 0; + return write_padded(out, specs, size, width, + [=](reserve_iterator it) { + return copy_str(data, data + size, it); + }); +} +template +FMT_CONSTEXPR auto write(OutputIt out, + basic_string_view> s, + const basic_format_specs& specs, locale_ref) + -> OutputIt { + return write(out, s, specs); +} +template +FMT_CONSTEXPR auto write(OutputIt out, const Char* s, + const basic_format_specs& specs, locale_ref) + -> OutputIt { + return check_cstring_type_spec(specs.type) + ? write(out, basic_string_view(s), specs, {}) + : write_ptr(out, to_uintptr(s), &specs); +} + +template +auto write_nonfinite(OutputIt out, bool isinf, basic_format_specs specs, + const float_specs& fspecs) -> OutputIt { auto str = isinf ? (fspecs.upper ? "INF" : "inf") : (fspecs.upper ? "NAN" : "nan"); constexpr size_t str_size = 3; auto sign = fspecs.sign; auto size = str_size + (sign ? 1 : 0); - using iterator = remove_reference_t; - return write_padded(out, specs, size, [=](iterator it) { + // Replace '0'-padding with space for non-finite values. + const bool is_zero_fill = + specs.fill.size() == 1 && *specs.fill.data() == static_cast('0'); + if (is_zero_fill) specs.fill[0] = static_cast(' '); + return write_padded(out, specs, size, [=](reserve_iterator it) { if (sign) *it++ = static_cast(data::signs[sign]); return copy_str(str, str + str_size, it); }); @@ -1738,74 +1603,76 @@ struct big_decimal_fp { int exponent; }; -inline int get_significand_size(const big_decimal_fp& fp) { +inline auto get_significand_size(const big_decimal_fp& fp) -> int { return fp.significand_size; } template -inline int get_significand_size(const dragonbox::decimal_fp& fp) { +inline auto get_significand_size(const dragonbox::decimal_fp& fp) -> int { return count_digits(fp.significand); } template -inline OutputIt write_significand(OutputIt out, const char* significand, - int& significand_size) { +inline auto write_significand(OutputIt out, const char* significand, + int& significand_size) -> OutputIt { return copy_str(significand, significand + significand_size, out); } template -inline OutputIt write_significand(OutputIt out, UInt significand, - int significand_size) { +inline auto write_significand(OutputIt out, UInt significand, + int significand_size) -> OutputIt { return format_decimal(out, significand, significand_size).end; } template ::value)> -inline Char* write_significand(Char* out, UInt significand, - int significand_size, int integral_size, - Char decimal_point) { +inline auto write_significand(Char* out, UInt significand, int significand_size, + int integral_size, Char decimal_point) -> Char* { if (!decimal_point) return format_decimal(out, significand, significand_size).end; auto end = format_decimal(out + 1, significand, significand_size).end; - if (integral_size == 1) + if (integral_size == 1) { out[0] = out[1]; - else - std::copy_n(out + 1, integral_size, out); + } else { + std::uninitialized_copy_n(out + 1, integral_size, + make_checked(out, to_unsigned(integral_size))); + } out[integral_size] = decimal_point; return end; } template >::value)> -inline OutputIt write_significand(OutputIt out, UInt significand, - int significand_size, int integral_size, - Char decimal_point) { +inline auto write_significand(OutputIt out, UInt significand, + int significand_size, int integral_size, + Char decimal_point) -> OutputIt { // Buffer is large enough to hold digits (digits10 + 1) and a decimal point. Char buffer[digits10() + 2]; auto end = write_significand(buffer, significand, significand_size, integral_size, decimal_point); - return detail::copy_str(buffer, end, out); + return detail::copy_str_noinline(buffer, end, out); } template -inline OutputIt write_significand(OutputIt out, const char* significand, - int significand_size, int integral_size, - Char decimal_point) { - out = detail::copy_str(significand, significand + integral_size, out); +inline auto write_significand(OutputIt out, const char* significand, + int significand_size, int integral_size, + Char decimal_point) -> OutputIt { + out = detail::copy_str_noinline(significand, + significand + integral_size, out); if (!decimal_point) return out; *out++ = decimal_point; - return detail::copy_str(significand + integral_size, - significand + significand_size, out); + return detail::copy_str_noinline(significand + integral_size, + significand + significand_size, out); } template -OutputIt write_float(OutputIt out, const DecimalFP& fp, - const basic_format_specs& specs, float_specs fspecs, - Char decimal_point) { +auto write_float(OutputIt out, const DecimalFP& fp, + const basic_format_specs& specs, float_specs fspecs, + Char decimal_point) -> OutputIt { auto significand = fp.significand; int significand_size = get_significand_size(fp); static const Char zero = static_cast('0'); auto sign = fspecs.sign; size_t size = to_unsigned(significand_size) + (sign ? 1 : 0); - using iterator = remove_reference_t; + using iterator = reserve_iterator; int output_exp = fp.exponent + significand_size - 1; auto use_exp_format = [=]() { @@ -1820,7 +1687,8 @@ OutputIt write_float(OutputIt out, const DecimalFP& fp, if (use_exp_format()) { int num_zeros = 0; if (fspecs.showpoint) { - num_zeros = (std::max)(fspecs.precision - significand_size, 0); + num_zeros = fspecs.precision - significand_size; + if (num_zeros < 0) num_zeros = 0; size += to_unsigned(num_zeros); } else if (significand_size == 1) { decimal_point = Char(); @@ -1836,7 +1704,7 @@ OutputIt write_float(OutputIt out, const DecimalFP& fp, // Insert a decimal point after the first digit and add an exponent. it = write_significand(it, significand, significand_size, 1, decimal_point); - if (num_zeros > 0) it = std::fill_n(it, num_zeros, zero); + if (num_zeros > 0) it = detail::fill_n(it, num_zeros, zero); *it++ = static_cast(exp_char); return write_exponent(output_exp, it); }; @@ -1855,15 +1723,15 @@ OutputIt write_float(OutputIt out, const DecimalFP& fp, #endif if (fspecs.showpoint) { if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 1; - if (num_zeros > 0) size += to_unsigned(num_zeros); + if (num_zeros > 0) size += to_unsigned(num_zeros) + 1; } return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = static_cast(data::signs[sign]); it = write_significand(it, significand, significand_size); - it = std::fill_n(it, fp.exponent, zero); + it = detail::fill_n(it, fp.exponent, zero); if (!fspecs.showpoint) return it; *it++ = decimal_point; - return num_zeros > 0 ? std::fill_n(it, num_zeros, zero) : it; + return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it; }); } else if (exp > 0) { // 1234e-2 -> 12.34[0+] @@ -1873,7 +1741,7 @@ OutputIt write_float(OutputIt out, const DecimalFP& fp, if (sign) *it++ = static_cast(data::signs[sign]); it = write_significand(it, significand, significand_size, exp, decimal_point); - return num_zeros > 0 ? std::fill_n(it, num_zeros, zero) : it; + return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it; }); } // 1234e-6 -> 0.001234 @@ -1882,21 +1750,22 @@ OutputIt write_float(OutputIt out, const DecimalFP& fp, fspecs.precision < num_zeros) { num_zeros = fspecs.precision; } - size += 2 + to_unsigned(num_zeros); + bool pointy = num_zeros != 0 || significand_size != 0 || fspecs.showpoint; + size += 1 + (pointy ? 1 : 0) + to_unsigned(num_zeros); return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = static_cast(data::signs[sign]); *it++ = zero; - if (num_zeros == 0 && significand_size == 0 && !fspecs.showpoint) return it; + if (!pointy) return it; *it++ = decimal_point; - it = std::fill_n(it, num_zeros, zero); + it = detail::fill_n(it, num_zeros, zero); return write_significand(it, significand, significand_size); }); } template ::value)> -OutputIt write(OutputIt out, T value, basic_format_specs specs, - locale_ref loc = {}) { +auto write(OutputIt out, T value, basic_format_specs specs, + locale_ref loc = {}) -> OutputIt { if (const_check(!is_supported_floating_point(value))) return out; float_specs fspecs = parse_float_type_spec(specs); fspecs.sign = specs.sign; @@ -1922,7 +1791,8 @@ OutputIt write(OutputIt out, T value, basic_format_specs specs, if (fspecs.format == float_format::hex) { if (fspecs.sign) buffer.push_back(data::signs[fspecs.sign]); snprintf_float(promote_float(value), specs.precision, fspecs, buffer); - return write_bytes(out, {buffer.data(), buffer.size()}, specs); + return write_bytes(out, {buffer.data(), buffer.size()}, + specs); } int precision = specs.precision >= 0 || !specs.type ? specs.precision : 6; if (fspecs.format == float_format::exp) { @@ -1943,7 +1813,7 @@ OutputIt write(OutputIt out, T value, basic_format_specs specs, template ::value)> -OutputIt write(OutputIt out, T value) { +auto write(OutputIt out, T value) -> OutputIt { if (const_check(!is_supported_floating_point(value))) return out; using floaty = conditional_t::value, double, T>; @@ -1969,72 +1839,36 @@ OutputIt write(OutputIt out, T value) { template ::value && !is_fast_float::value)> -inline OutputIt write(OutputIt out, T value) { +inline auto write(OutputIt out, T value) -> OutputIt { return write(out, value, basic_format_specs()); } template -OutputIt write_char(OutputIt out, Char value, - const basic_format_specs& specs) { - using iterator = remove_reference_t; - return write_padded(out, specs, 1, [=](iterator it) { - *it++ = value; - return it; - }); -} - -template -OutputIt write_ptr(OutputIt out, UIntPtr value, - const basic_format_specs* specs) { - int num_digits = count_digits<4>(value); - auto size = to_unsigned(num_digits) + size_t(2); - using iterator = remove_reference_t; - auto write = [=](iterator it) { - *it++ = static_cast('0'); - *it++ = static_cast('x'); - return format_uint<4, Char>(it, value, num_digits); - }; - return specs ? write_padded(out, *specs, size, write) - : base_iterator(out, write(reserve(out, size))); -} - -template struct is_integral : std::is_integral {}; -template <> struct is_integral : std::true_type {}; -template <> struct is_integral : std::true_type {}; - -template -OutputIt write(OutputIt out, monostate) { +auto write(OutputIt out, monostate, basic_format_specs = {}, + locale_ref = {}) -> OutputIt { FMT_ASSERT(false, ""); return out; } -template ::value)> -OutputIt write(OutputIt out, string_view value) { - auto it = reserve(out, value.size()); - it = copy_str(value.begin(), value.end(), it); - return base_iterator(out, it); -} - template -OutputIt write(OutputIt out, basic_string_view value) { +FMT_CONSTEXPR auto write(OutputIt out, basic_string_view value) + -> OutputIt { auto it = reserve(out, value.size()); - it = std::copy(value.begin(), value.end(), it); + it = copy_str_noinline(value.begin(), value.end(), it); return base_iterator(out, it); } -template -buffer_appender write(buffer_appender out, - basic_string_view value) { - get_container(out).append(value.begin(), value.end()); - return out; +template ::value)> +constexpr auto write(OutputIt out, const T& value) -> OutputIt { + return write(out, to_string_view(value)); } template ::value && !std::is_same::value && !std::is_same::value)> -OutputIt write(OutputIt out, T value) { +FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt { auto abs_value = static_cast>(value); bool negative = is_negative(value); // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. @@ -2052,20 +1886,39 @@ OutputIt write(OutputIt out, T value) { return base_iterator(out, it); } -template -OutputIt write(OutputIt out, bool value) { - return write(out, string_view(value ? "true" : "false")); +// FMT_ENABLE_IF() condition separated to workaround MSVC bug +template < + typename Char, typename OutputIt, typename T, + bool check = + std::is_enum::value && !std::is_same::value && + mapped_type_constant>::value != + type::custom_type, + FMT_ENABLE_IF(check)> +FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt { + return write( + out, static_cast::type>(value)); +} + +template ::value)> +FMT_CONSTEXPR auto write(OutputIt out, T value, + const basic_format_specs& specs = {}, + locale_ref = {}) -> OutputIt { + return specs.type && specs.type != 's' + ? write(out, value ? 1 : 0, specs, {}) + : write_bytes(out, value ? "true" : "false", specs); } template -OutputIt write(OutputIt out, Char value) { +FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt { auto it = reserve(out, 1); *it++ = value; return base_iterator(out, it); } template -OutputIt write(OutputIt out, const Char* value) { +FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value) + -> OutputIt { if (!value) { FMT_THROW(format_error("string pointer is null")); } else { @@ -2075,16 +1928,21 @@ OutputIt write(OutputIt out, const Char* value) { return out; } -template -OutputIt write(OutputIt out, const void* value) { - return write_ptr(out, to_uintptr(value), nullptr); +template ::value)> +auto write(OutputIt out, const T* value, + const basic_format_specs& specs = {}, locale_ref = {}) + -> OutputIt { + check_pointer_type_spec(specs.type, error_handler()); + return write_ptr(out, to_uintptr(value), &specs); } template -auto write(OutputIt out, const T& value) -> typename std::enable_if< - mapped_type_constant>::value == - type::custom_type, - OutputIt>::type { +FMT_CONSTEXPR auto write(OutputIt out, const T& value) -> + typename std::enable_if< + mapped_type_constant>::value == + type::custom_type, + OutputIt>::type { using context_type = basic_format_context; using formatter_type = conditional_t::value, @@ -2096,292 +1954,52 @@ auto write(OutputIt out, const T& value) -> typename std::enable_if< // An argument visitor that formats the argument and writes it via the output // iterator. It's a class and not a generic lambda for compatibility with C++11. -template struct default_arg_formatter { - using context = basic_format_context; +template struct default_arg_formatter { + using iterator = buffer_appender; + using context = buffer_context; - OutputIt out; + iterator out; basic_format_args args; locale_ref loc; - template OutputIt operator()(T value) { + template auto operator()(T value) -> iterator { return write(out, value); } - - OutputIt operator()(typename basic_format_arg::handle handle) { + auto operator()(typename basic_format_arg::handle h) -> iterator { basic_format_parse_context parse_ctx({}); - basic_format_context format_ctx(out, args, loc); - handle.format(parse_ctx, format_ctx); + context format_ctx(out, args, loc); + h.format(parse_ctx, format_ctx); return format_ctx.out(); } }; -template -class arg_formatter_base { - public: - using iterator = OutputIt; - using char_type = Char; - using format_specs = basic_format_specs; +template struct arg_formatter { + using iterator = buffer_appender; + using context = buffer_context; - private: - iterator out_; - locale_ref locale_; - format_specs* specs_; + iterator out; + const basic_format_specs& specs; + locale_ref locale; - // Attempts to reserve space for n extra characters in the output range. - // Returns a pointer to the reserved range or a reference to out_. - auto reserve(size_t n) -> decltype(detail::reserve(out_, n)) { - return detail::reserve(out_, n); + template + FMT_CONSTEXPR FMT_INLINE auto operator()(T value) -> iterator { + return detail::write(out, value, specs, locale); } - - using reserve_iterator = remove_reference_t(), 0))>; - - template void write_int(T value, const format_specs& spec) { - using uint_type = uint32_or_64_or_128_t; - int_writer w(out_, locale_, value, spec); - handle_int_type_spec(spec.type, w); - out_ = w.out; - } - - void write(char value) { - auto&& it = reserve(1); - *it++ = value; - } - - template ::value)> - void write(Ch value) { - out_ = detail::write(out_, value); - } - - void write(string_view value) { - auto&& it = reserve(value.size()); - it = copy_str(value.begin(), value.end(), it); - } - void write(wstring_view value) { - static_assert(std::is_same::value, ""); - auto&& it = reserve(value.size()); - it = std::copy(value.begin(), value.end(), it); - } - - template - void write(const Ch* s, size_t size, const format_specs& specs) { - auto width = specs.width != 0 - ? count_code_points(basic_string_view(s, size)) - : 0; - out_ = write_padded(out_, specs, size, width, [=](reserve_iterator it) { - return copy_str(s, s + size, it); - }); - } - - template - void write(basic_string_view s, const format_specs& specs = {}) { - out_ = detail::write(out_, s, specs); - } - - void write_pointer(const void* p) { - out_ = write_ptr(out_, to_uintptr(p), specs_); - } - - struct char_spec_handler : ErrorHandler { - arg_formatter_base& formatter; - Char value; - - char_spec_handler(arg_formatter_base& f, Char val) - : formatter(f), value(val) {} - - void on_int() { - // char is only formatted as int if there are specs. - formatter.write_int(static_cast(value), *formatter.specs_); - } - void on_char() { - if (formatter.specs_) - formatter.out_ = write_char(formatter.out_, value, *formatter.specs_); - else - formatter.write(value); - } - }; - - struct cstring_spec_handler : error_handler { - arg_formatter_base& formatter; - const Char* value; - - cstring_spec_handler(arg_formatter_base& f, const Char* val) - : formatter(f), value(val) {} - - void on_string() { formatter.write(value); } - void on_pointer() { formatter.write_pointer(value); } - }; - - protected: - iterator out() { return out_; } - format_specs* specs() { return specs_; } - - void write(bool value) { - if (specs_) - write(string_view(value ? "true" : "false"), *specs_); - else - out_ = detail::write(out_, value); - } - - void write(const Char* value) { - if (!value) { - FMT_THROW(format_error("string pointer is null")); - } else { - auto length = std::char_traits::length(value); - basic_string_view sv(value, length); - specs_ ? write(sv, *specs_) : write(sv); - } - } - - public: - arg_formatter_base(OutputIt out, format_specs* s, locale_ref loc) - : out_(out), locale_(loc), specs_(s) {} - - iterator operator()(monostate) { - FMT_ASSERT(false, "invalid argument type"); - return out_; - } - - template ::value)> - FMT_INLINE iterator operator()(T value) { - if (specs_) - write_int(value, *specs_); - else - out_ = detail::write(out_, value); - return out_; - } - - iterator operator()(Char value) { - handle_char_specs(specs_, - char_spec_handler(*this, static_cast(value))); - return out_; - } - - iterator operator()(bool value) { - if (specs_ && specs_->type) return (*this)(value ? 1 : 0); - write(value != 0); - return out_; - } - - template ::value)> - iterator operator()(T value) { - auto specs = specs_ ? *specs_ : format_specs(); - if (const_check(is_supported_floating_point(value))) - out_ = detail::write(out_, value, specs, locale_); - else - FMT_ASSERT(false, "unsupported float argument type"); - return out_; - } - - iterator operator()(const Char* value) { - if (!specs_) return write(value), out_; - handle_cstring_type_spec(specs_->type, cstring_spec_handler(*this, value)); - return out_; - } - - iterator operator()(basic_string_view value) { - if (specs_) { - check_string_type_spec(specs_->type, error_handler()); - write(value, *specs_); - } else { - write(value); - } - return out_; - } - - iterator operator()(const void* value) { - if (specs_) check_pointer_type_spec(specs_->type, error_handler()); - write_pointer(value); - return out_; + auto operator()(typename basic_format_arg::handle) -> iterator { + // User-defined types are handled separately because they require access + // to the parse context. + return out; } }; -/** The default argument formatter. */ -template -class arg_formatter : public arg_formatter_base { - private: - using char_type = Char; - using base = arg_formatter_base; - using context_type = basic_format_context; +template struct custom_formatter { + basic_format_parse_context& parse_ctx; + buffer_context& ctx; - context_type& ctx_; - basic_format_parse_context* parse_ctx_; - const Char* ptr_; - - public: - using iterator = typename base::iterator; - using format_specs = typename base::format_specs; - - /** - \rst - Constructs an argument formatter object. - *ctx* is a reference to the formatting context, - *specs* contains format specifier information for standard argument types. - \endrst - */ - explicit arg_formatter( - context_type& ctx, - basic_format_parse_context* parse_ctx = nullptr, - format_specs* specs = nullptr, const Char* ptr = nullptr) - : base(ctx.out(), specs, ctx.locale()), - ctx_(ctx), - parse_ctx_(parse_ctx), - ptr_(ptr) {} - - using base::operator(); - - /** Formats an argument of a user-defined type. */ - iterator operator()(typename basic_format_arg::handle handle) { - if (ptr_) advance_to(*parse_ctx_, ptr_); - handle.format(*parse_ctx_, ctx_); - return ctx_.out(); + void operator()( + typename basic_format_arg>::handle h) const { + h.format(parse_ctx, ctx); } -}; - -template FMT_CONSTEXPR bool is_name_start(Char c) { - return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; -} - -// Parses the range [begin, end) as an unsigned integer. This function assumes -// that the range is non-empty and the first character is a digit. -template -FMT_CONSTEXPR int parse_nonnegative_int(const Char*& begin, const Char* end, - ErrorHandler&& eh) { - FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); - unsigned value = 0; - // Convert to unsigned to prevent a warning. - constexpr unsigned max_int = max_value(); - unsigned big = max_int / 10; - do { - // Check for overflow. - if (value > big) { - value = max_int + 1; - break; - } - value = value * 10 + unsigned(*begin - '0'); - ++begin; - } while (begin != end && '0' <= *begin && *begin <= '9'); - if (value > max_int) eh.on_error("number is too big"); - return static_cast(value); -} - -template class custom_formatter { - private: - using char_type = typename Context::char_type; - - basic_format_parse_context& parse_ctx_; - Context& ctx_; - - public: - explicit custom_formatter(basic_format_parse_context& parse_ctx, - Context& ctx) - : parse_ctx_(parse_ctx), ctx_(ctx) {} - - void operator()(typename basic_format_arg::handle h) const { - h.format(parse_ctx_, ctx_); - } - template void operator()(T) const {} }; @@ -2396,13 +2014,13 @@ template class width_checker { explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T value) { + FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { if (is_negative(value)) handler_.on_error("negative width"); return static_cast(value); } template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T) { + FMT_CONSTEXPR auto operator()(T) -> unsigned long long { handler_.on_error("width is not integer"); return 0; } @@ -2416,13 +2034,13 @@ template class precision_checker { explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T value) { + FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { if (is_negative(value)) handler_.on_error("negative precision"); return static_cast(value); } template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T) { + FMT_CONSTEXPR auto operator()(T) -> unsigned long long { handler_.on_error("precision is not integer"); return 0; } @@ -2431,148 +2049,50 @@ template class precision_checker { ErrorHandler& handler_; }; -// A format specifier handler that sets fields in basic_format_specs. -template class specs_setter { - public: - explicit FMT_CONSTEXPR specs_setter(basic_format_specs& specs) - : specs_(specs) {} - - FMT_CONSTEXPR specs_setter(const specs_setter& other) - : specs_(other.specs_) {} - - FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; } - FMT_CONSTEXPR void on_fill(basic_string_view fill) { - specs_.fill = fill; - } - FMT_CONSTEXPR void on_plus() { specs_.sign = sign::plus; } - FMT_CONSTEXPR void on_minus() { specs_.sign = sign::minus; } - FMT_CONSTEXPR void on_space() { specs_.sign = sign::space; } - FMT_CONSTEXPR void on_hash() { specs_.alt = true; } - - FMT_CONSTEXPR void on_zero() { - specs_.align = align::numeric; - specs_.fill[0] = Char('0'); - } - - FMT_CONSTEXPR void on_width(int width) { specs_.width = width; } - FMT_CONSTEXPR void on_precision(int precision) { - specs_.precision = precision; - } - FMT_CONSTEXPR void end_precision() {} - - FMT_CONSTEXPR void on_type(Char type) { - specs_.type = static_cast(type); - } - - protected: - basic_format_specs& specs_; -}; - -template class numeric_specs_checker { - public: - FMT_CONSTEXPR numeric_specs_checker(ErrorHandler& eh, detail::type arg_type) - : error_handler_(eh), arg_type_(arg_type) {} - - FMT_CONSTEXPR void require_numeric_argument() { - if (!is_arithmetic_type(arg_type_)) - error_handler_.on_error("format specifier requires numeric argument"); - } - - FMT_CONSTEXPR void check_sign() { - require_numeric_argument(); - if (is_integral_type(arg_type_) && arg_type_ != type::int_type && - arg_type_ != type::long_long_type && arg_type_ != type::char_type) { - error_handler_.on_error("format specifier requires signed argument"); - } - } - - FMT_CONSTEXPR void check_precision() { - if (is_integral_type(arg_type_) || arg_type_ == type::pointer_type) - error_handler_.on_error("precision not allowed for this argument type"); - } - - private: - ErrorHandler& error_handler_; - detail::type arg_type_; -}; - -// A format specifier handler that checks if specifiers are consistent with the -// argument type. -template class specs_checker : public Handler { - private: - numeric_specs_checker checker_; - - // Suppress an MSVC warning about using this in initializer list. - FMT_CONSTEXPR Handler& error_handler() { return *this; } - - public: - FMT_CONSTEXPR specs_checker(const Handler& handler, detail::type arg_type) - : Handler(handler), checker_(error_handler(), arg_type) {} - - FMT_CONSTEXPR specs_checker(const specs_checker& other) - : Handler(other), checker_(error_handler(), other.arg_type_) {} - - FMT_CONSTEXPR void on_align(align_t align) { - if (align == align::numeric) checker_.require_numeric_argument(); - Handler::on_align(align); - } - - FMT_CONSTEXPR void on_plus() { - checker_.check_sign(); - Handler::on_plus(); - } - - FMT_CONSTEXPR void on_minus() { - checker_.check_sign(); - Handler::on_minus(); - } - - FMT_CONSTEXPR void on_space() { - checker_.check_sign(); - Handler::on_space(); - } - - FMT_CONSTEXPR void on_hash() { - checker_.require_numeric_argument(); - Handler::on_hash(); - } - - FMT_CONSTEXPR void on_zero() { - checker_.require_numeric_argument(); - Handler::on_zero(); - } - - FMT_CONSTEXPR void end_precision() { checker_.check_precision(); } -}; - template