/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/ Sandia National Laboratories LAMMPS development team: developers@lammps.org Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Ngoc Cuong Nguyen (MIT) and Andrew Rohskopf (SNL) ------------------------------------------------------------------------- */ #include "fitpod_command.h" #include "atom.h" #include "comm.h" #include "error.h" #include "force.h" #include "memory.h" #include "modify.h" #include "neigh_list.h" #include "neighbor.h" #include "pair.h" #include "pod.h" #include "tokenizer.h" #include "update.h" #include "math_const.h" #include #include #include #include #include #include using namespace LAMMPS_NS; #define MAXLINE 1024 std::vector static globVector(const std::string& pattern, std::vector & files) { glob_t glob_result; glob(pattern.c_str(),GLOB_TILDE,NULL,&glob_result); for(unsigned int i=0;iall(FLERR,"Illegal fitpod command"); std::string pod_file = std::string(arg[0]); // pod input file std::string data_file = std::string(arg[1]); // data input file std::string coeff_file; // coefficient input file if (narg > 2) coeff_file = std::string(arg[2]); // coefficient input file else coeff_file = ""; podptr = new CPOD(lmp, pod_file, coeff_file); read_data_files(data_file, podptr->pod.species); if ((int) traindata.data_path.size() > 1) allocate_memory(traindata); else if ((int) testdata.data_path.size() > 1) allocate_memory(testdata); // get POD coefficients from an input file if (coeff_file != "") podptr->podArrayCopy(desc.c, podptr->pod.coeff, podptr->pod.nd); // compute POD coefficients using least-squares method least_squares_fit(traindata); // calculate errors for the training data set if ((traindata.training_analysis) && ((int) traindata.data_path.size() > 1) ) error_analysis(traindata, desc.c); // calculate errors for the test data set if ((testdata.test_analysis) && ((int) testdata.data_path.size() > 1) && (testdata.data_path != traindata.data_path)) error_analysis(testdata, desc.c); // calculate energy and force for the training data set if ((traindata.training_calculation) && ((int) traindata.data_path.size() > 1) ) energyforce_calculation(traindata, desc.c); // calculate energy and force for the test data set if ((testdata.test_calculation) && ((int) testdata.data_path.size() > 1) && (testdata.data_path != traindata.data_path) ) energyforce_calculation(testdata, desc.c); // deallocate training data if ((int) traindata.data_path.size() > 1){ memory->destroy(traindata.lattice); memory->destroy(traindata.energy); memory->destroy(traindata.stress); memory->destroy(traindata.position); memory->destroy(traindata.force); memory->destroy(traindata.atomtype); } // deallocate testing data if ((int) testdata.data_path.size() > 1 && (testdata.data_path != traindata.data_path)){ memory->destroy(testdata.lattice); memory->destroy(testdata.energy); memory->destroy(testdata.stress); memory->destroy(testdata.position); memory->destroy(testdata.force); memory->destroy(testdata.atomtype); } // deallocate descriptors memory->destroy(desc.gd); memory->destroy(desc.gdd); memory->destroy(desc.A); memory->destroy(desc.b); memory->destroy(desc.c); memory->destroy(desc.tmpint); // deallocate neighbor data memory->destroy(nb.alist); memory->destroy(nb.pairnum); memory->destroy(nb.pairnum_cumsum); memory->destroy(nb.pairlist); memory->destroy(nb.y); } /* ---------------------------------------------------------------------- */ void CFITPOD::read_data_file(double *fitting_weights, std::string &file_format, std::string &file_extension, std::string &test_path, std::string &training_path, std::string data_file) { std::string datafilename = data_file; FILE *fpdata; if (comm->me == 0){ fpdata = utils::open_potential(datafilename,lmp,nullptr); if (fpdata == nullptr) error->one(FLERR,"Cannot open training data file {}: ", datafilename, utils::getsyserror()); } // loop through lines of training data file and parse keywords char line[MAXLINE],*ptr; int eof = 0; while (true) { if (comm->me == 0) { ptr = fgets(line,MAXLINE,fpdata); if (ptr == nullptr) { eof = 1; fclose(fpdata); } } MPI_Bcast(&eof,1,MPI_INT,0,world); if (eof) break; MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); // words = ptrs to all words in line // strip single and double quotes from words std::vector words; try { words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); } catch (TokenizerException &) { // ignore } if (words.size() == 0) continue; auto keywd = words[0]; if (words.size() != 2) error->one(FLERR,"Improper POD file.", utils::getsyserror()); // settings for fitting weights if (keywd == "fitting_weight_energy") fitting_weights[0] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "fitting_weight_force") fitting_weights[1] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "fitting_weight_stress") fitting_weights[2] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "error_analysis_for_training_data_set") fitting_weights[3] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "error_analysis_for_test_data_set") fitting_weights[4] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "energy_force_calculation_for_training_data_set") fitting_weights[5] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "energy_force_calculation_for_test_data_set") fitting_weights[6] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "percentage_training_data_set") fitting_weights[7] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "percentage_test_data_set") fitting_weights[8] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "randomize_training_data_set") fitting_weights[9] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "randomize_test_data_set") fitting_weights[10] = utils::numeric(FLERR,words[1],false,lmp); // other settings if (keywd == "file_format") file_format = words[1]; if (keywd == "file_extension") file_extension = words[1]; if (keywd == "path_to_training_data_set") training_path = words[1]; if (keywd == "path_to_test_data_set") test_path = words[1]; } utils::logmesg(lmp, "**************** Begin of Data File ****************\n"); utils::logmesg(lmp, "file format: {}\n", file_format); utils::logmesg(lmp, "file extension: {}\n", file_extension); utils::logmesg(lmp, "path to training data set: {}\n", training_path); utils::logmesg(lmp, "path to test data set: {}\n", test_path); utils::logmesg(lmp, "training percentage: {}\n", fitting_weights[7]); utils::logmesg(lmp, "test percentage: {}\n", fitting_weights[8]); utils::logmesg(lmp, "randomize training data set: {}\n", fitting_weights[9]); utils::logmesg(lmp, "randomize test data set: {}\n", fitting_weights[10]); utils::logmesg(lmp, "error analysis for training data set: {}\n", fitting_weights[3]); utils::logmesg(lmp, "error analysis for test data set: {}\n", fitting_weights[4]); utils::logmesg(lmp, "energy/force calculation for training data set: {}\n", fitting_weights[5]); utils::logmesg(lmp, "energy/force calculation for test data set: {}\n", fitting_weights[6]); utils::logmesg(lmp, "fitting weight for energy: {}\n", fitting_weights[0]); utils::logmesg(lmp, "fitting weight for force: {}\n", fitting_weights[1]); utils::logmesg(lmp, "fitting weight for stress: {}\n", fitting_weights[2]); utils::logmesg(lmp, "**************** End of Data File ****************\n"); } void CFITPOD::get_exyz_files(std::vector& files, std::string datapath, std::string extension) { std::vector res = globVector(datapath + "/*." + extension, files); } int CFITPOD::get_number_atom_exyz(std::vector& num_atom, int& num_atom_sum, std::string file) { std::string filename = file; FILE *fp; if (comm->me == 0){ fp = utils::open_potential(filename,lmp,nullptr); if (fp == nullptr) error->one(FLERR,"Cannot open POD coefficient file {}: ", filename, utils::getsyserror()); } char line[MAXLINE],*ptr; int eof = 0; int num_configs = 0; num_atom_sum = 0; // loop over all lines of this xyz file and extract number of atoms and number of configs while (true) { if (comm->me == 0) { ptr = fgets(line,MAXLINE,fp); if (ptr == nullptr) { eof = 1; fclose(fp); } } MPI_Bcast(&eof,1,MPI_INT,0,world); if (eof) break; MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); // words = ptrs to all words in line // strip single and double quotes from words std::vector words; try { words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); } catch (TokenizerException &) { // ignore } if (words.size() == 0) continue; int natom; if (words.size() == 1){ natom = utils::inumeric(FLERR,words[0],false,lmp); num_atom.push_back(natom); num_configs += 1; num_atom_sum += natom; } } return num_configs; } int CFITPOD::get_number_atoms(std::vector& num_atom, std::vector &num_atom_sum, std::vector& num_config, std::vector training_files) { int nfiles = training_files.size(); // number of files int d, n; for (int i=0; i species) { std::string filename = file; FILE *fp; if (comm->me == 0){ fp = utils::open_potential(filename,lmp,nullptr); if (fp == nullptr) error->one(FLERR,"Cannot open POD coefficient file {}: ", filename, utils::getsyserror()); } char line[MAXLINE],*ptr; int eof = 0; int cfi = 0; int nat = 0; int ns = species.size(); // loop over all lines of this xyz file and extract training data while (true) { if (comm->me == 0) { ptr = fgets(line,MAXLINE,fp); if (ptr == nullptr) { eof = 1; fclose(fp); } } MPI_Bcast(&eof,1,MPI_INT,0,world); if (eof) break; MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); // words = ptrs to all words in line // strip single and double quotes from words std::vector words; try { words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); } catch (TokenizerException &) { // ignore } if (words.size() == 0) continue; ValueTokenizer text(utils::trim_comment(line),"\"' \t\n\r\f"); if (text.contains("attice")){ // find the word containing "lattice" auto it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("attice") != std::string::npos; }); // get index of element from iterator int index = std::distance(words.begin(), it); if (words[index].find("=") != std::string::npos) { // lattice numbers start at index + 1 for (int k = 0; k < 9; k++){ lattice[k + 9*cfi] = utils::numeric(FLERR,words[index+1+k],false,lmp); } } else{ // lattice numbers start at index + 2 for (int k = 0; k < 9; k++){ lattice[k + 9*cfi] = utils::numeric(FLERR,words[index+2+k],false,lmp); } } // find the word containing "energy" it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("nergy") != std::string::npos; }); // get index of element from iterator index = std::distance(words.begin(), it); if (words[index].find("=") != std::string::npos) { // energy is after "=" inside this string std::size_t found = words[index].find("="); energy[cfi] = utils::numeric(FLERR,words[index].substr(found+1),false,lmp); } else{ // energy is at index + 2 energy[cfi] = utils::numeric(FLERR,words[index+2],false,lmp); } // find the word containing "stress" it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("tress") != std::string::npos; }); // get index of element from iterator index = std::distance(words.begin(), it); if (words[index].find("=") != std::string::npos) { // lattice numbers start at index + 1 for (int k = 0; k < 9; k++){ stress[k + 9*cfi] = utils::numeric(FLERR,words[index+1+k],false,lmp); } } else{ // lattice numbers start at index + 2 for (int k = 0; k < 9; k++){ stress[k + 9*cfi] = utils::numeric(FLERR,words[index+2+k],false,lmp); } } cfi += 1; } // loop over atoms else if (words.size() > 1){ for (int ii = 0; ii < ns; ii++) if (species[ii] == words[0]) atomtype[nat] = ii+1; for (int k = 0; k < 6; k++){ if (k <= 2) pos[k + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp); if (k > 2 ) forces[k-3 + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp); } nat += 1; } } } void CFITPOD::get_data(datastruct &data, std::vector species) { get_exyz_files(data.data_files, data.data_path, data.file_extension); data.num_atom_sum = get_number_atoms(data.num_atom, data.num_atom_each_file, data.num_config, data.data_files); data.num_config_sum = data.num_atom.size(); size_t maxname = 9; for (auto fname : data.data_files) maxname = MAX(maxname,fname.size()); maxname -= data.data_path.size()+1; utils::logmesg(lmp, " {:^{}} | number of configurations | number of atoms\n{:=<{}}\n", "data file", maxname, "", maxname+46); int i = 0; for (auto fname : data.data_files) { std::string filename = fname.substr(data.data_path.size()+1); data.filenames.push_back(filename); utils::logmesg(lmp, " {:<{}} | {:>10} | {:>8}\n", filename, maxname, data.num_config[i], data.num_atom_each_file[i]); ++i; } utils::logmesg(lmp, "{:=<{}}\n", "", maxname+46); utils::logmesg(lmp, "number of files: {}\n", data.data_files.size()); utils::logmesg(lmp, "number of configurations in all files: {}\n", data.num_config_sum); utils::logmesg(lmp, "number of atoms in all files: {}\n", data.num_atom_sum); int n = data.num_config_sum; memory->create(data.lattice, 9*n, "fitpod:lattice"); memory->create(data.stress, 9*n, "fitpod:stress"); memory->create(data.energy, n, "fitpod:energy"); n = data.num_atom_sum; memory->create(data.position, 3*n, "fitpod:position"); memory->create(data.force, 3*n, "fitpod:force"); memory->create(data.atomtype, n, "fitpod:atomtype"); int nfiles = data.data_files.size(); // number of files int nconfigs = 0; int natoms = 0; for (int i=0; ipodArrayMin(&data.num_atom[0], len); data.num_atom_max = podptr->podArrayMax(&data.num_atom[0], len); data.num_atom_cumsum.resize(len+1); podptr->podCumsum(&data.num_atom_cumsum[0], &data.num_atom[0], len+1); data.num_config_cumsum.resize(nfiles+1); podptr->podCumsum(&data.num_config_cumsum[0], &data.num_config[0], nfiles+1); // convert all structures to triclinic system constexpr int DIM = 3; double Qmat[DIM*DIM]; for (int ci=0; cimatrix33_inverse(Qmat, a1, a2, a3); podptr->triclinic_lattice_conversion(a1, a2, a3, a1, a2, a3); podptr->matrix33_multiplication(Qmat, lattice, Qmat, DIM); podptr->matrix33_multiplication(x, Qmat, x, natom); podptr->matrix33_multiplication(f, Qmat, f, natom); } utils::logmesg(lmp, "minimum number of atoms: {}\n", data.num_atom_min); utils::logmesg(lmp, "maximum number of atoms: {}\n", data.num_atom_max); } std::vector CFITPOD::linspace(int start_in, int end_in, int num_in) { std::vector linspaced; double start = static_cast(start_in); double end = static_cast(end_in); double num = static_cast(num_in); int elm; if (num == 0) { return linspaced; } if (num == 1) { elm = (int) std::round(start); linspaced.push_back(elm); return linspaced; } double delta = (end - start) / (num - 1); for(int i=0; i < num-1; ++i) { elm = (int) std::round(start + delta * i); linspaced.push_back(elm); } elm = (int) std::round(end); linspaced.push_back(elm); return linspaced; } std::vector CFITPOD::shuffle(int start_in, int end_in, int num_in) { int sz = end_in - start_in + 1; std::vector myvector(sz); for (int i = 0; i shuffle_vec(num_in); for (int i = 0; i CFITPOD::select(int n, double percentage, int randomize) { std::vector selected; int m = (int) std::round(n*percentage); m = MAX(m, 1); selected = (randomize==1) ? shuffle(1, n, m) : linspace(1, n, m); return selected; } void CFITPOD::select_data(datastruct &newdata, datastruct data) { double percentage = data.percentage; int randomize = data.randomize; if (randomize==1) utils::logmesg(lmp, "Select {} percent of the data set at random using shuffle\n", data.percentage*100); else utils::logmesg(lmp, "Select {} percent of the data set deterministically using linspace\n", data.percentage*100); int nfiles = data.data_files.size(); // number of files std::vector> selected(nfiles); newdata.num_config.resize(nfiles); newdata.num_config_cumsum.resize(nfiles+1); newdata.num_atom_each_file.resize(nfiles); for (int file = 0; file < nfiles; file++) { int nconfigs = data.num_config[file]; selected[file] = select(nconfigs, percentage, randomize); int ns = (int) selected[file].size(); // number of selected configurations newdata.num_config[file] = ns; int num_atom_sum = 0; for (int ii=0; ii < ns; ii++) { // loop over each selected configuration in a file int ci = data.num_config_cumsum[file] + selected[file][ii] - 1; int natom = data.num_atom[ci]; newdata.num_atom.push_back(natom); num_atom_sum += natom; } newdata.num_atom_each_file[file] = num_atom_sum; } int len = newdata.num_atom.size(); newdata.num_atom_min = podptr->podArrayMin(&newdata.num_atom[0], len); newdata.num_atom_max = podptr->podArrayMax(&newdata.num_atom[0], len); newdata.num_atom_cumsum.resize(len+1); podptr->podCumsum(&newdata.num_atom_cumsum[0], &newdata.num_atom[0], len+1); newdata.num_atom_sum = newdata.num_atom_cumsum[len]; podptr->podCumsum(&newdata.num_config_cumsum[0], &newdata.num_config[0], nfiles+1); newdata.num_config_sum = newdata.num_atom.size(); int n = data.num_config_sum; memory->create(newdata.lattice, 9*n, "fitpod:newdata_lattice"); memory->create(newdata.stress, 9*n, "fitpod:newdata_stress"); memory->create(newdata.energy, n, "fitpod:newdata_energy"); n = data.num_atom_sum; memory->create(newdata.position, 3*n, "fitpod:newdata_position"); memory->create(newdata.force, 3*n, "fitpod:newdata_force"); memory->create(newdata.atomtype, n, "fitpod:newdata_atomtype"); int cn = 0, dim=3; for (int file = 0; file < nfiles; file++) { int ns = (int) selected[file].size(); // number of selected configurations for (int ii=0; ii < ns; ii++) { // loop over each selected configuration in a file int ci = data.num_config_cumsum[file] + selected[file][ii] - 1; int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; int natomnew = newdata.num_atom[cn]; int natomnew_cumsum = newdata.num_atom_cumsum[cn]; if (natom != natomnew) error->all(FLERR,"number of atoms in the new data set must be the same as that in the old data set."); int *atomtype = &data.atomtype[natom_cumsum]; double *position = &data.position[dim*natom_cumsum]; double *force = &data.force[dim*natom_cumsum]; newdata.energy[cn] = data.energy[ci]; for (int j=0; j<9; j++) { newdata.stress[j+9*cn] = data.stress[j+9*ci]; newdata.lattice[j+9*cn] = data.lattice[j+9*ci]; } for (int na=0; na species) { datastruct data; // read data input file to datastruct read_data_file(data.fitting_weights, data.file_format, data.file_extension, testdata.data_path, data.data_path, data_file); data.training_analysis = (int) data.fitting_weights[3]; data.test_analysis = (int) data.fitting_weights[4]; data.training_calculation = (int) data.fitting_weights[5]; data.test_calculation = (int) data.fitting_weights[6]; data.percentage = data.fitting_weights[7]; data.randomize = (int) data.fitting_weights[9]; data.copydatainfo(traindata); if (data.percentage >= 1.0) { utils::logmesg(lmp, "**************** Begin of Training Data Set ****************\n"); if ((int) traindata.data_path.size() > 1) get_data(traindata, species); else error->all(FLERR,"data set is not found"); utils::logmesg(lmp, "**************** End of Training Data Set ****************\n"); } else { utils::logmesg(lmp, "**************** Begin of Training Data Set ****************\n"); if ((int) data.data_path.size() > 1) get_data(data, species); else error->all(FLERR,"data set is not found"); utils::logmesg(lmp, "**************** End of Training Data Set ****************\n"); utils::logmesg(lmp, "**************** Begin of Select Training Data Set ****************\n"); select_data(traindata, data); utils::logmesg(lmp, "**************** End of Select Training Data Set ****************\n"); memory->destroy(data.lattice); memory->destroy(data.energy); memory->destroy(data.stress); memory->destroy(data.position); memory->destroy(data.force); memory->destroy(data.atomtype); } if (((int) testdata.data_path.size() > 1) && (testdata.data_path != traindata.data_path)) { testdata.training = 0; testdata.file_format = traindata.file_format; testdata.file_extension = traindata.file_extension; testdata.training_analysis = traindata.training_analysis; testdata.test_analysis = traindata.test_analysis; testdata.training_calculation = traindata.training_calculation; testdata.test_calculation = traindata.test_calculation; testdata.percentage = traindata.fitting_weights[8]; testdata.randomize = (int) traindata.fitting_weights[10]; utils::logmesg(lmp, "**************** Begin of Test Data Set ****************\n"); get_data(testdata, species); utils::logmesg(lmp, "**************** End of Test Data Set ****************\n"); } else { testdata.data_path = traindata.data_path; } } int CFITPOD::latticecoords(double *y, int *alist, double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, int nx) { int m=0, n=0, p=0; if (pbc[0] == 1) m = (int) ceil(rcut/a1[0]); if (pbc[1] == 1) n = (int) ceil(rcut/a2[1]); if (pbc[2] == 1) p = (int) ceil(rcut/a3[2]); // index for the center lattice int ind = m + (2*m+1)*(n) + (2*m+1)*(2*n+1)*(p); // number of lattices int nl = (2*m+1)*(2*n+1)*(2*p+1); for (int j=0; j<3*nx; j++) y[j] = x[j]; int q = nx; for (int i = 0; i < (2*p+1); i++) for (int j = 0; j < (2*n+1); j++) for (int k = 0; k < (2*m+1); k++) { int ii = k + (2*m+1)*j + (2*m+1)*(2*n+1)*i; if (ii != ind) { double x0 = a1[0]*(k - m) + a2[0]*(j - n) + a3[0]*(i - p); double x1 = a1[1]*(k - m) + a2[1]*(j - n) + a3[1]*(i - p); double x2 = a1[2]*(k - m) + a2[2]*(j - n) + a3[2]*(i - p); for (int jj=0; jj 1e-12) && (rijsq <= rcutsq)) { inc += 1; neighlist[k] = j; k += 1; } } numneigh[i] = inc; } return k; } int CFITPOD::podfullneighborlist(double *y, int *alist, int *neighlist, int *numneigh, int *numneighsum, double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, int nx) { double rcutsq = rcut*rcut; int dim = 3, nl = 0, nn = 0; // number of lattices nl = latticecoords(y, alist, x, a1, a2, a3, rcut, pbc, nx); int N = nx*nl; // total number of neighbors nn = podneighborlist(neighlist, numneigh, y, rcutsq, nx, N, dim); podptr->podCumsum(numneighsum, numneigh, nx+1); return nn; } void CFITPOD::allocate_memory(datastruct data) { int nd = podptr->pod.nd; memory->create(desc.gd, nd, "fitpod:desc_gd"); memory->create(desc.A, nd*nd, "fitpod:desc_A"); memory->create(desc.b, nd, "fitpod:desc_b"); memory->create(desc.c, nd, "fitpod:desc_c"); podptr->podArraySetValue(desc.A, 0.0, nd*nd); podptr->podArraySetValue(desc.b, 0.0, nd); podptr->podArraySetValue(desc.c, 0.0, nd); int dim = 3; int natom_max = data.num_atom_max; int nd1 = podptr->pod.nd1; int nd2 = podptr->pod.nd2; int nd3 = podptr->pod.nd3; int nd4 = podptr->pod.nd4; int nelements = podptr->pod.nelements; int nbesselpars = podptr->pod.nbesselpars; int nrbf2 = podptr->pod.nbf2; int nabf3 = podptr->pod.nabf3; int nrbf3 = podptr->pod.nrbf3; int *pdegree2 = podptr->pod.twobody; int *pdegree3 = podptr->pod.threebody; int *pbc = podptr->pod.pbc; double rcut = podptr->pod.rcut; int Nj=0, Nij=0; int m=0, n=0, p=0, nl=0, ny=0, na=0, np=0; for (int ci=0; ci<(int) data.num_atom.size(); ci++) { int natom = data.num_atom[ci]; double *lattice = &data.lattice[9*ci]; double *a1 = &lattice[0]; double *a2 = &lattice[3]; double *a3 = &lattice[6]; if (pbc[0] == 1) m = (int) ceil(rcut/a1[0]); if (pbc[1] == 1) n = (int) ceil(rcut/a2[1]); if (pbc[2] == 1) p = (int) ceil(rcut/a3[2]); // number of lattices nl = (2*m+1)*(2*n+1)*(2*p+1); ny = MAX(ny,dim*natom*nl); na = MAX(na, natom*nl); np = MAX(np, natom*natom*nl); } memory->create(nb.y, ny, "fitpod:nb_y"); memory->create(nb.alist, na, "fitpod:nb_alist"); memory->create(nb.pairnum, natom_max, "fitpod:nb_pairnum"); memory->create(nb.pairnum_cumsum, natom_max+1, "fitpod:nb_pairnum_cumsum"); memory->create(nb.pairlist, np, "fitpod:nb_pairlist"); nb.natom_max = natom_max; nb.sze = nelements*nelements; nb.sza = na; nb.szy = ny; nb.szp = np; utils::logmesg(lmp,"**************** Begin of Memory Allocation ****************\n"); int szd = 0, szi=0, szsnap=0; for (int ci=0; ci<(int) data.num_atom.size(); ci++) { int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; double *x = &data.position[dim*natom_cumsum]; double *lattice = &data.lattice[9*ci]; double *a1 = &lattice[0]; double *a2 = &lattice[3]; double *a3 = &lattice[6]; Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, x, a1, a2, a3, rcut, pbc, natom); int ns2 = pdegree2[0]*nbesselpars + pdegree2[1]; int ns3 = pdegree3[0]*nbesselpars + pdegree3[1]; int szd1 = 3*Nij+ (1+dim)*Nij*MAX(nrbf2+ns2,nrbf3+ns3) + (nabf3+1)*7; int szi1 = 6*Nij + 2*natom+1 + (Nj-1)*Nj; szd = MAX(szd, szd1); szi = MAX(szi, szi1); if (podptr->sna.twojmax>0) { szd1 = 0; szd1 += Nij*dim; // rij szd1 += MAX(2*podptr->sna.idxu_max*Nij, 2*podptr->sna.idxz_max*podptr->sna.ndoubles*natom); // (Ur, Ui) and (Zr, Zi) szd1 += 2*podptr->sna.idxu_max*dim*Nij; // dUr, dUi szd1 += MAX(podptr->sna.idxb_max*podptr->sna.ntriples*dim*Nij, 2*podptr->sna.idxu_max*podptr->sna.nelements*natom); // dblist and (Utotr, Utoti) szsnap = MAX(szsnap, szd1); } } szd = MAX(szsnap, szd); szd = MAX(natom_max*(nd1+nd2+nd3+nd4) + szd, dim*natom_max*(nd-nd1-nd2-nd3-nd4)); szd = dim*natom_max*(nd1+nd2+nd3+nd4) + szd; // gdd includes linear descriptors derivatives, quadratic descriptors derivatives and temporary memory memory->create(desc.gdd, szd, "fitpod:desc_gdd"); memory->create(desc.tmpint, szi, "fitpod:desc_tmpint"); desc.szd = szd; desc.szi = szi; utils::logmesg(lmp, "maximum number of atoms in periodic domain: {}\n", natom_max); utils::logmesg(lmp, "maximum number of atoms in extended domain: {}\n", nb.sza); utils::logmesg(lmp, "maximum number of neighbors in extended domain: {}\n", nb.szp); utils::logmesg(lmp, "size of double memory: {}\n", szd); utils::logmesg(lmp, "size of int memory: {}\n", szi); utils::logmesg(lmp, "size of descriptor matrix: {} x {}\n", nd, nd); utils::logmesg(lmp, "**************** End of Memory Allocation ****************\n"); } void CFITPOD::linear_descriptors(datastruct data, int ci) { int dim = 3; int nd1 = podptr->pod.nd1; int nd2 = podptr->pod.nd2; int nd3 = podptr->pod.nd3; int nd4 = podptr->pod.nd4; int nd1234 = nd1+nd2+nd3+nd4; int *pbc = podptr->pod.pbc; double rcut = podptr->pod.rcut; int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; int *atomtype = &data.atomtype[natom_cumsum]; double *position = &data.position[dim*natom_cumsum]; double *lattice = &data.lattice[9*ci]; double *a1 = &lattice[0]; double *a2 = &lattice[3]; double *a3 = &lattice[6]; // neighbor list int Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, position, a1, a2, a3, rcut, pbc, natom); int *tmpint = &desc.tmpint[0]; double *tmpmem = &desc.gdd[dim*natom*nd1234+natom*nd1234]; podptr->linear_descriptors(desc.gd, desc.gdd, nb.y, tmpmem, atomtype, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, tmpint, natom, Nij); } void CFITPOD::quadratic_descriptors(datastruct data, int ci) { int dim = 3; int natom = data.num_atom[ci]; int nd1 = podptr->pod.nd1; int nd2 = podptr->pod.nd2; int nd3 = podptr->pod.nd3; int nd4 = podptr->pod.nd4; int nd22 = podptr->pod.nd22; int nd23 = podptr->pod.nd23; int nd24 = podptr->pod.nd24; int nd33 = podptr->pod.nd33; int nd34 = podptr->pod.nd34; int nd44 = podptr->pod.nd44; int nd123 = nd1+nd2+nd3; int nd1234 = nd1+nd2+nd3+nd4; double *fatom2 = &desc.gdd[dim*natom*(nd1)]; double *fatom3 = &desc.gdd[dim*natom*(nd1+nd2)]; double *fatom4 = &desc.gdd[dim*natom*(nd123)]; // global descriptors for four-body quadratic22 potential if (nd22>0) { int nq2 = podptr->pod.quadratic22[0]*podptr->pod.nc2; podptr->quadratic_descriptors(&desc.gd[nd1234], &desc.gdd[dim*natom*nd1234], &desc.gd[nd1], fatom2, nq2, dim*natom); } // global descriptors for four-body quadratic23 potential if (nd23>0) { int nq2 = podptr->pod.quadratic23[0]*podptr->pod.nc2; int nq3 = podptr->pod.quadratic23[1]*podptr->pod.nc3; podptr->quadratic_descriptors(&desc.gd[nd1234+nd22], &desc.gdd[dim*natom*(nd1234+nd22)], &desc.gd[nd1], &desc.gd[nd1+nd2], fatom2, fatom3, nq2, nq3, dim*natom); } // global descriptors for five-body quadratic24 potential if (nd24>0) { int nq2 = podptr->pod.quadratic24[0]*podptr->pod.nc2; int nq4 = podptr->pod.quadratic24[1]*podptr->pod.nc4; podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23], &desc.gdd[dim*natom*(nd1234+nd22+nd23)], &desc.gd[nd1], &desc.gd[nd1+nd2+nd3], fatom2, fatom4, nq2, nq4, dim*natom); } // global descriptors for five-body quadratic33 potential if (nd33>0) { int nq3 = podptr->pod.quadratic33[0]*podptr->pod.nc3; podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23+nd24], &desc.gdd[dim*natom*(nd1234+nd22+nd23+nd24)], &desc.gd[nd1+nd2], fatom3, nq3, dim*natom); } // global descriptors for six-body quadratic34 potential if (nd34>0) { int nq3 = podptr->pod.quadratic34[0]*podptr->pod.nc3; int nq4 = podptr->pod.quadratic34[1]*podptr->pod.nc4; podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23+nd24+nd33], &desc.gdd[dim*natom*(nd1234+nd22+nd23+nd24+nd33)], &desc.gd[nd1+nd2], &desc.gd[nd1+nd2+nd3], fatom3, fatom4, nq3, nq4, dim*natom); } // global descriptors for seven-body quadratic44 potential if (nd44>0) { int nq4 = podptr->pod.quadratic44[0]*podptr->pod.nc4; podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23+nd24+nd33+nd34], &desc.gdd[dim*natom*(nd1234+nd22+nd23+nd24+nd33+nd34)], &desc.gd[nd1+nd2+nd3], fatom4, nq4, dim*natom); } // normalize quadratic descriptors for (int i=0; i<(nd22+nd23+nd24+nd33+nd34+nd44); i++) desc.gd[nd1234+i] = desc.gd[nd1234+i]/(natom); for (int i=0; ipod.nd1; int nd2 = podptr->pod.nd2; int nd3 = podptr->pod.nd3; int nd4 = podptr->pod.nd4; int nd22 = podptr->pod.nd22; int nd23 = podptr->pod.nd23; int nd24 = podptr->pod.nd24; int nd33 = podptr->pod.nd33; int nd34 = podptr->pod.nd34; int nd44 = podptr->pod.nd44; int nd234 = podptr->pod.nd234; int nd333 = podptr->pod.nd333; int nd444 = podptr->pod.nd444; int nd123 = nd1+nd2+nd3; int nd1234 = nd1+nd2+nd3+nd4; // global descriptors for seven-body cubic234 potential if (nd234>0) { int nq2 = podptr->pod.cubic234[0]*podptr->pod.nc2; int nq3 = podptr->pod.cubic234[1]*podptr->pod.nc3; int nq4 = podptr->pod.cubic234[2]*podptr->pod.nc4; int np3 = nd1234+nd22+nd23+nd24+nd33+nd34+nd44; double *eatom2 = &desc.gd[nd1]; double *eatom3 = &desc.gd[nd1+nd2]; double *eatom4 = &desc.gd[nd123]; double *fatom2 = &desc.gdd[dim*natom*(nd1)]; double *fatom3 = &desc.gdd[dim*natom*(nd1+nd2)]; double *fatom4 = &desc.gdd[dim*natom*(nd123)]; podptr->cubic_descriptors(&desc.gd[np3], &desc.gdd[dim*natom*np3], eatom2, eatom3, eatom4, fatom2, fatom3, fatom4, nq2, nq3, nq4, dim*natom); } // global descriptors for seven-body cubic333 potential if (nd333>0) { int nq3 = podptr->pod.cubic333[0]*podptr->pod.nc3; int np3 = nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234; double *eatom3 = &desc.gd[nd1+nd2]; double *fatom3 = &desc.gdd[dim*natom*(nd1+nd2)]; podptr->cubic_descriptors(&desc.gd[np3], &desc.gdd[dim*natom*np3], eatom3, fatom3, nq3, dim*natom); } // global descriptors for ten-body cubic444 potential if (nd444>0) { int nq4 = podptr->pod.cubic444[0]*podptr->pod.nc4; int np4 = nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234+nd333; double *eatom4 = &desc.gd[nd123]; double *fatom4 = &desc.gdd[dim*natom*(nd123)]; podptr->cubic_descriptors(&desc.gd[np4], &desc.gdd[dim*natom*(np4)], eatom4, fatom4, nq4, dim*natom); } // normalize cubic descriptors int nd = podptr->pod.nd; for (int i=(nd1234+nd22+nd23+nd24+nd33+nd34+nd44); ipod.nd; int nforce = dim*natom; // compute energy weight and force weight double normconst = 1.0; if (data.normalizeenergy==1) normconst = 1.0/natom; double we = data.fitting_weights[0]; double wf = data.fitting_weights[1]; double we2 = (we*we)*(normconst*normconst); double wf2 = (wf*wf); // get energy and force from the training data set double energy = data.energy[ci]; double *force = &data.force[dim*natom_cumsum]; // least-square matrix for all descriptors: A = A + (we*we)*(gd^T * gd) podptr->podKron(desc.A, desc.gd, desc.gd, we2, nd, nd); // least-square matrix for all descriptors derivatives: A = A + (wf*wf) * (gdd^T * gdd) char cht = 'T'; char chn = 'N'; double one = 1.0; int inc1 = 1; DGEMM(&cht, &chn, &nd, &nd, &nforce, &wf2, desc.gdd, &nforce, desc.gdd, &nforce, &one, desc.A, &nd); // least-square vector for all descriptors: b = b + (we*we*energy)*gd double wee = we2*energy; for (int i = 0; i< nd; i++) desc.b[i] += wee*desc.gd[i]; // least-square vector for all descriptors derivatives: b = b + (wf*wf) * (gdd^T * f) DGEMV(&cht, &nforce, &nd, &wf2, desc.gdd, &nforce, force, &inc1, &one, desc.b, &inc1); } void CFITPOD::least_squares_fit(datastruct data) { utils::logmesg(lmp, "**************** Begin of Least-Squares Fitting ****************\n"); // loop over each configuration in the training data set for (int ci=0; ci < (int) data.num_atom.size(); ci++) { if ((ci % 100)==0) utils::logmesg(lmp, "Configuration: # {}\n", ci+1); // compute linear POD descriptors linear_descriptors(data, ci); // compute quadratic POD descriptors quadratic_descriptors(data, ci); // compute cubic POD descriptors cubic_descriptors(data, ci); // assemble the least-squares linear system least_squares_matrix(data, ci); } int nd = podptr->pod.nd; for (int i = 0; iprint_matrix( "Least-squares coefficient vector:", 1, nd, desc.c, 1); // save coefficients into a text file FILE *fp = fopen("coefficients.txt", "w"); fmt::print(fp, "POD_coefficients: {}\n", nd); for (int count = 0; count < nd; count++){ fmt::print(fp, "{:.20}\n", desc.c[count]); } fclose(fp); utils::logmesg(lmp, "**************** End of Least-Squares Fitting ****************\n"); } double CFITPOD::energyforce_calculation(double *force, double *coeff, datastruct data, int ci) { int dim = 3; int *pbc = podptr->pod.pbc; double rcut = podptr->pod.rcut; int nd1234 = podptr->pod.nd1 + podptr->pod.nd2 + podptr->pod.nd3 + podptr->pod.nd4; int natom = data.num_atom[ci]; int natom_cumsum2 = data.num_atom_cumsum[ci]; int *atomtype = &data.atomtype[natom_cumsum2]; double *position = &data.position[dim*natom_cumsum2]; double *lattice = &data.lattice[9*ci]; double *a1 = &lattice[0]; double *a2 = &lattice[3]; double *a3 = &lattice[6]; // neighbor list int Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, position, a1, a2, a3, rcut, pbc, natom); double *tmpmem = &desc.gdd[0]; int *tmpint = &desc.tmpint[0]; double *rij = &tmpmem[0]; // 3*Nij int *ai = &tmpint[0]; // Nij int *aj = &tmpint[Nij]; // Nij int *ti = &tmpint[2*Nij]; // Nij int *tj = &tmpint[3*Nij]; // Nij int *idxi = &tmpint[4*Nij]; // Nij podptr->podNeighPairs(rij, nb.y, idxi, ai, aj, ti, tj, nb.pairnum_cumsum, atomtype, nb.pairlist, nb.alist, natom); double *effectivecoeff = &tmpmem[3*Nij]; // 3*Nij podptr->podArraySetValue(effectivecoeff, 0.0, nd1234); double energy = podptr->energyforce_calculation(force, coeff, effectivecoeff, desc.gd, rij, &tmpmem[3*Nij+nd1234], nb.pairnum_cumsum, atomtype, idxi, ai, aj, ti, tj, natom, Nij); return energy; } void CFITPOD::print_analysis(datastruct data, double *outarray, double *errors) { std::string s = "All files"; int nfiles = data.data_files.size(); // number of files int lm = s.size(); for (int i = 0; i < nfiles; i++) lm = MAX(lm, (int) data.filenames[i].size()); lm = lm + 2; std::string filename_errors = data.training ? "training_errors.txt" : "test_errors.txt"; std::string filename_analysis = data.training ? "training_analysis.txt" : "test_analysis.txt"; FILE *fp_errors = fopen(filename_errors.c_str(), "w"); FILE *fp_analysis = fopen(filename_errors.c_str(), "w"); std::string sa = "**************** Begin of Error Analysis for the Training Data Set ****************"; std::string sb = "**************** Begin of Error Analysis for the Test Data Set ****************"; std::string mystr = (data.training) ? sa : sb; utils::logmesg(lmp, "{}\n", mystr); fmt::print(fp_errors, mystr + "\n"); sa = "----------------------------------------------------------------------------------------\n"; sb = " File | # configs | # atoms | MAE energy | RMSE energy | MAE force | RMSE force |\n"; utils::logmesg(lmp, "{}", sa); utils::logmesg(lmp, "{}", sb); utils::logmesg(lmp, "{}", sa); fmt::print(fp_errors, sa); fmt::print(fp_errors, sb); fmt::print(fp_errors, sa); int ci=0, m=8, nc=0, nf=0; for (int file = 0; file < nfiles; file++) { fmt::print(fp_analysis, "# {}\n", data.filenames[file]); sb = "| config | # atoms | energy | DFT energy | energy error | force | DFT force | force error |\n"; fmt::print(fp_analysis, sb); int nforceall = 0; int nconfigs = data.num_config[file]; nc += nconfigs; for (int ii=0; ii < nconfigs; ii++) { // loop over each configuration in a file fmt::print(fp_analysis, " "); for(int count = 0; count < m; count ++) fmt::print(fp_analysis, "{} ", outarray[count + m*ci]); fmt::print(fp_analysis, "\n"); nforceall += 3*data.num_atom[ci]; ci += 1; } nf += nforceall; int q = file+1; std::string s = data.filenames[file]; s = s + std::string(lm-s.size(), ' '); std::string s1 = std::to_string(nconfigs); s1 = s1 + std::string(MAX(6- (int) s1.size(),1), ' '); s = s + " " + s1; s1 = std::to_string(nforceall/3); s1 = s1 + std::string(MAX(7 - (int) s1.size(),1), ' '); s = s + " " + s1; s1 = std::to_string(errors[0 + 4*q]); s1 = s1 + std::string(MAX(10 - (int) s1.size(),1), ' '); s = s + " " + s1; s1 = std::to_string(errors[1 + 4*q]); s1 = s1 + std::string(MAX(10 - (int) s1.size(),1), ' '); s = s + " " + s1; s1 = std::to_string(errors[2 + 4*q]); s1 = s1 + std::string(MAX(10 - (int) s1.size(),1), ' '); s = s + " " + s1; s1 = std::to_string(errors[3 + 4*q]); s1 = s1 + std::string(MAX(10 - (int) s1.size(),1), ' '); s = s + " " + s1 + "\n"; utils::logmesg(lmp, "{}", s); fmt::print(fp_errors, "{}", s); } utils::logmesg(lmp, "{}", sa); fmt::print(fp_errors, "{}", sa); s = s + std::string(MAX(lm - (int) s.size(),1), ' '); std::string s1 = std::to_string(nc); s1 = s1 + std::string(MAX(6- (int) s1.size(),1), ' '); s = s + " " + s1; s1 = std::to_string(nf/3); s1 = s1 + std::string(MAX(7 - (int) s1.size(),1), ' '); s = s + " " + s1; s1 = std::to_string(errors[0]); s1 = s1 + std::string(MAX(10 - (int) s1.size(),1), ' '); s = s + " " + s1; s1 = std::to_string(errors[1]); s1 = s1 + std::string(MAX(10 - (int) s1.size(),1), ' '); s = s + " " + s1; s1 = std::to_string(errors[2]); s1 = s1 + std::string(MAX(10 - (int) s1.size(),1), ' '); s = s + " " + s1; s1 = std::to_string(errors[3]); s1 = s1 + std::string(MAX(10 - (int) s1.size(),1), ' '); s = s + " " + s1 + "\n"; utils::logmesg(lmp, "{}", s); utils::logmesg(lmp, "{}", sa); fmt::print(fp_errors, "{}", s); fmt::print(fp_errors, "{}", sa); sa = "**************** End of Error Analysis for the Training Data Set ****************"; sb = "**************** End of Error Analysis for the Test Data Set ****************"; mystr = (data.training) ? sa : sb; utils::logmesg(lmp, "{}\n", mystr); fmt::print(fp_errors, "{}\n", mystr); fclose(fp_errors); fclose(fp_analysis); } void CFITPOD::error_analysis(datastruct data, double *coeff) { int dim = 3; double energy; double force[dim*data.num_atom_max]; int nfiles = data.data_files.size(); // number of files int num_configs = data.num_atom.size(); // number of configurations in all files //int nd12 = podptr->pod.nd1 + podptr->pod.nd2; //int nd123 = podptr->pod.nd1 + podptr->pod.nd2 + podptr->pod.nd3; //int nd1234 = podptr->pod.nd1 + podptr->pod.nd2 + podptr->pod.nd3 + podptr->pod.nd4; //double effectivecoeff[nd1234]; int m = 8; double outarray[m*num_configs]; double errors[4*(nfiles+1)]; for (int i=0; i<4*(nfiles+1); i++) errors[i] = 0.0; int nd1 = podptr->pod.nd1; int nd2 = podptr->pod.nd2; int nd3 = podptr->pod.nd3; int nd4 = podptr->pod.nd4; int nd22 = podptr->pod.nd22; int nd23 = podptr->pod.nd23; int nd24 = podptr->pod.nd24; int nd33 = podptr->pod.nd33; int nd34 = podptr->pod.nd34; int nd44 = podptr->pod.nd44; int nd1234 = nd1+nd2+nd3+nd4; int nd = podptr->pod.nd; double newcoeff[nd]; for (int j=0; jpodArrayNorm(force, nforce); outarray[6 + m*ci] = podptr->podArrayNorm(DFTforce, nforce); double diff, sum = 0.0, ssr = 0.0; for (int j=0; j( &force[0] ), sizeof(double) * (1 + nforce), 1, fp); fclose(fp); } } utils::logmesg(lmp, "**************** End of Energy/Force Calculation ****************\n"); }