diff --git a/src/ML-POD/fitpod_command.cpp b/src/ML-POD/fitpod_command.cpp index ee9d64bab5..ba7ffeba3a 100644 --- a/src/ML-POD/fitpod_command.cpp +++ b/src/ML-POD/fitpod_command.cpp @@ -15,7 +15,6 @@ Contributing authors: Ngoc Cuong Nguyen (MIT) and Andrew Rohskopf (SNL) ------------------------------------------------------------------------- */ - #include "fitpod_command.h" #include "comm.h" @@ -28,8 +27,8 @@ #include #include #include -#include #include +#include #include "eapod.h" @@ -39,20 +38,79 @@ using MathSpecial::powint; static constexpr int MAXLINE = 1024; static constexpr double SMALL = 1.0e-10; +FitPOD::datastruct::datastruct() : + file_format("extxyz"), file_extension("xyz"), filenametag("pod"), group_weight_type("global"), + lattice(nullptr), energy(nullptr), stress(nullptr), position(nullptr), force(nullptr), + atomtype(nullptr), we(nullptr), wf(nullptr), + fitting_weights{100.0, 1.0, 0.0, 1, 1, 0, 0, 1, 1, 1, 1, 1e-10} +{ + training = 1; + normalizeenergy = 1; + training_analysis = 1; + test_analysis = 1; + training_calculation = 0; + test_calculation = 0; + randomize = 1; + precision = 8; + fraction = 1.0; +} + +void FitPOD::datastruct::copydatainfo(datastruct &data) const +{ + data.data_path = data_path; + data.file_format = file_format; + data.file_extension = file_extension; + data.data_files = data_files; + data.filenametag = filenametag; + data.filenames = filenames; + data.training_analysis = training_analysis; + data.test_analysis = test_analysis; + data.training_calculation = training_calculation; + data.test_calculation = test_calculation; + data.fraction = fraction; + data.randomize = randomize; + data.precision = precision; + data.training = training; + data.normalizeenergy = normalizeenergy; + for (int i = 0; i < 12; i++) data.fitting_weights[i] = fitting_weights[i]; + data.we_map = we_map; + data.wf_map = wf_map; +} + +FitPOD::neighborstruct::neighborstruct() : + alist(nullptr), pairnum(nullptr), pairnum_cumsum(nullptr), pairlist(nullptr), y(nullptr) +{ + natom_max = 0; + sze = 0; + sza = 0; + szy = 0; + szp = 0; +} + +FitPOD::descriptorstruct::descriptorstruct() : + bd(nullptr), pd(nullptr), gd(nullptr), gdd(nullptr), A(nullptr), b(nullptr), c(nullptr) +{ + szd = 0; + nCoeffAll = 0; + nClusters = 0; +} + FitPOD::FitPOD(LAMMPS *_lmp) : Command(_lmp), fastpodptr(nullptr) { + save_descriptors = 0; + compute_descriptors = 0; } void FitPOD::command(int narg, char **arg) { if (narg < 2) utils::missing_cmd_args(FLERR, "fitpod", error); - std::string pod_file = std::string(arg[0]); // pod input file - std::string data_file = std::string(arg[1]); // data input file - std::string coeff_file, proj_file, cent_file; // coefficient input files + std::string pod_file = std::string(arg[0]); // pod input file + std::string data_file = std::string(arg[1]); // data input file + std::string coeff_file, proj_file, cent_file; // coefficient input files if (narg > 2) - coeff_file = std::string(arg[2]); // coefficient input file + coeff_file = std::string(arg[2]); // coefficient input file else coeff_file = ""; @@ -62,9 +120,13 @@ void FitPOD::command(int narg, char **arg) desc.nClusters = fastpodptr->nClusters; read_data_files(data_file, fastpodptr->species); - estimate_memory_neighborstruct(traindata, fastpodptr->pbc, fastpodptr->rcut, fastpodptr->nelements); - estimate_memory_neighborstruct(testdata, fastpodptr->pbc, fastpodptr->rcut, fastpodptr->nelements); - if (desc.nClusters > 1) estimate_memory_neighborstruct(envdata, fastpodptr->pbc, fastpodptr->rcut, fastpodptr->nelements); + estimate_memory_neighborstruct(traindata, fastpodptr->pbc, fastpodptr->rcut, + fastpodptr->nelements); + estimate_memory_neighborstruct(testdata, fastpodptr->pbc, fastpodptr->rcut, + fastpodptr->nelements); + if (desc.nClusters > 1) + estimate_memory_neighborstruct(envdata, fastpodptr->pbc, fastpodptr->rcut, + fastpodptr->nelements); allocate_memory_neighborstruct(); estimate_memory_fastpod(traindata); estimate_memory_fastpod(testdata); @@ -74,42 +136,42 @@ void FitPOD::command(int narg, char **arg) if (((int) envdata.data_path.size() > 1) && (desc.nClusters > 1)) { environment_cluster_calculation(envdata); - memory->destroy(envdata.lattice); - memory->destroy(envdata.energy); - memory->destroy(envdata.stress); - memory->destroy(envdata.position); - memory->destroy(envdata.force); - memory->destroy(envdata.atomtype); - memory->destroy(envdata.we); - memory->destroy(envdata.wf); + memory->destroy(envdata.lattice); + memory->destroy(envdata.energy); + memory->destroy(envdata.stress); + memory->destroy(envdata.position); + memory->destroy(envdata.force); + memory->destroy(envdata.atomtype); + memory->destroy(envdata.we); + memory->destroy(envdata.wf); } - if (compute_descriptors==0) { + if (compute_descriptors == 0) { // compute POD coefficients using least-squares method if (coeff_file == "") { least_squares_fit(traindata); - if (comm->me == 0) { // save coefficients into a text file - std::string filename = traindata.filenametag + "_coefficients" + ".pod"; + if (comm->me == 0) { // save coefficients into a text file + std::string filename = traindata.filenametag + "_coefficients" + ".pod"; FILE *fp = fopen(filename.c_str(), "w"); int nCoeffAll = desc.nCoeffAll; - int n1 = 0, n2=0; + int n1 = 0, n2 = 0; if (((int) envdata.data_path.size() > 1) && (desc.nClusters > 1)) { - n1 = fastpodptr->nComponents*fastpodptr->Mdesc*fastpodptr->nelements; - n2 = fastpodptr->nComponents*fastpodptr->nClusters*fastpodptr->nelements; + n1 = fastpodptr->nComponents * fastpodptr->Mdesc * fastpodptr->nelements; + n2 = fastpodptr->nComponents * fastpodptr->nClusters * fastpodptr->nelements; } fmt::print(fp, "model_coefficients: {} {} {}\n", nCoeffAll, n1, n2); for (int count = 0; count < nCoeffAll; count++) { - fmt::print(fp, "{:<10.{}f}\n", desc.c[count], traindata.precision); + fmt::print(fp, "{:<10.{}f}\n", desc.c[count], traindata.precision); } for (int count = 0; count < n1; count++) { - fmt::print(fp, "{:<10.{}f}\n", fastpodptr->Proj[count], 14); + fmt::print(fp, "{:<10.{}f}\n", fastpodptr->Proj[count], 14); } for (int count = 0; count < n2; count++) { - fmt::print(fp, "{:<10.{}f}\n", fastpodptr->Centroids[count], 14); + fmt::print(fp, "{:<10.{}f}\n", fastpodptr->Centroids[count], 14); } fclose(fp); } @@ -117,32 +179,52 @@ void FitPOD::command(int narg, char **arg) // calculate errors for the training data set - if ((traindata.training_analysis) && ((int) traindata.data_path.size() > 1) ) + if ((traindata.training_analysis) && ((int) traindata.data_path.size() > 1)) error_analysis(traindata, desc.c); //error->all(FLERR, "stop after error_analysis"); // calculate energy and force for the training data set - if ((traindata.training_calculation) && ((int) traindata.data_path.size() > 1) ) + if ((traindata.training_calculation) && ((int) traindata.data_path.size() > 1)) energyforce_calculation(traindata); - if (!((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0) && (traindata.fraction == 1.0))) - { + if (!((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0) && + (traindata.fraction == 1.0))) { // calculate errors for the test data set - if ((testdata.test_analysis) && ((int) testdata.data_path.size() > 1) && (testdata.fraction > 0) ) { + if ((testdata.test_analysis) && ((int) testdata.data_path.size() > 1) && + (testdata.fraction > 0)) { error_analysis(testdata, desc.c); } // calculate energy and force for the test data set - if ((testdata.test_analysis) && (testdata.test_calculation) && ((int) testdata.data_path.size() > 1) && (testdata.fraction > 0) ) + if ((testdata.test_analysis) && (testdata.test_calculation) && + ((int) testdata.data_path.size() > 1) && (testdata.fraction > 0)) energyforce_calculation(testdata); // deallocate testing data - if ((int) testdata.data_path.size() > 1 && (testdata.test_analysis) && (testdata.fraction > 0) ){ + if ((int) testdata.data_path.size() > 1 && (testdata.test_analysis) && + (testdata.fraction > 0)) { + memory->destroy(testdata.lattice); + memory->destroy(testdata.energy); + memory->destroy(testdata.stress); + memory->destroy(testdata.position); + memory->destroy(testdata.force); + memory->destroy(testdata.atomtype); + memory->destroy(testdata.we); + memory->destroy(testdata.wf); + } + } + } else if (compute_descriptors > 0) { + // compute and save POD descriptors + descriptors_calculation(traindata); + + if (!((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0))) { + if ((int) testdata.data_path.size() > 1) { + descriptors_calculation(testdata); memory->destroy(testdata.lattice); memory->destroy(testdata.energy); memory->destroy(testdata.stress); @@ -154,29 +236,10 @@ void FitPOD::command(int narg, char **arg) } } } - else if (compute_descriptors>0) { - // compute and save POD descriptors - descriptors_calculation(traindata); - - if (!((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0))) - { - if ((int) testdata.data_path.size() > 1){ - descriptors_calculation(testdata); - memory->destroy(testdata.lattice); - memory->destroy(testdata.energy); - memory->destroy(testdata.stress); - memory->destroy(testdata.position); - memory->destroy(testdata.force); - memory->destroy(testdata.atomtype); - memory->destroy(testdata.we); - memory->destroy(testdata.wf); - } - } - } // deallocate training data - if ((int) traindata.data_path.size() > 1){ + if ((int) traindata.data_path.size() > 1) { memory->destroy(traindata.lattice); memory->destroy(traindata.energy); memory->destroy(traindata.stress); @@ -208,11 +271,12 @@ void FitPOD::command(int narg, char **arg) } int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, - std::string &file_extension, std::string &env_path, std::string &test_path, - std::string &training_path, std::string &filenametag, - const std::string &data_file, std::string &group_weight_type, - std::unordered_map &we_map, - std::unordered_map &wf_map) + std::string &file_extension, std::string &env_path, + std::string &test_path, std::string &training_path, + std::string &filenametag, const std::string &data_file, + std::string &group_weight_type, + std::unordered_map &we_map, + std::unordered_map &wf_map) { int precision = 8; @@ -220,33 +284,33 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, FILE *fpdata; if (comm->me == 0) { - fpdata = utils::open_potential(datafilename,lmp,nullptr); + fpdata = utils::open_potential(datafilename, lmp, nullptr); if (fpdata == nullptr) - error->one(FLERR,"Cannot open training data file {}: ", datafilename, utils::getsyserror()); + error->one(FLERR, "Cannot open training data file {}: ", datafilename, utils::getsyserror()); } // loop through lines of training data file and parse keywords - char line[MAXLINE],*ptr; + char line[MAXLINE], *ptr; int eof = 0; while (true) { if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fpdata); + ptr = fgets(line, MAXLINE, fpdata); if (ptr == nullptr) { eof = 1; fclose(fpdata); } } - MPI_Bcast(&eof,1,MPI_INT,0,world); + MPI_Bcast(&eof, 1, MPI_INT, 0, world); if (eof) break; - MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + MPI_Bcast(line, MAXLINE, MPI_CHAR, 0, world); // words = ptrs to all words in line // strip single and double quotes from words std::vector words; try { - words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); + words = Tokenizer(utils::trim_comment(line), "\"' \t\n\r\f").as_vector(); } catch (TokenizerException &) { // ignore } @@ -255,26 +319,40 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, auto keywd = words[0]; - if (words.size() != 2) - error->one(FLERR,"Improper POD data file.", utils::getsyserror()); + if (words.size() != 2) error->one(FLERR, "Improper POD data file.", utils::getsyserror()); // settings for fitting weights - if (keywd == "fitting_weight_energy") fitting_weights[0] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "fitting_weight_force") fitting_weights[1] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "fitting_weight_stress") fitting_weights[2] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "error_analysis_for_training_data_set") fitting_weights[3] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "error_analysis_for_test_data_set") fitting_weights[4] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "energy_force_calculation_for_training_data_set") fitting_weights[5] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "energy_force_calculation_for_test_data_set") fitting_weights[6] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "fraction_training_data_set") fitting_weights[7] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "fraction_test_data_set") fitting_weights[8] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "randomize_training_data_set") fitting_weights[9] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "randomize_test_data_set") fitting_weights[10] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "fitting_regularization_parameter") fitting_weights[11] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "precision_for_pod_coefficients") precision = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "save_pod_descriptors") save_descriptors = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "compute_pod_descriptors") compute_descriptors = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "fitting_weight_energy") + fitting_weights[0] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "fitting_weight_force") + fitting_weights[1] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "fitting_weight_stress") + fitting_weights[2] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "error_analysis_for_training_data_set") + fitting_weights[3] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "error_analysis_for_test_data_set") + fitting_weights[4] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "energy_force_calculation_for_training_data_set") + fitting_weights[5] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "energy_force_calculation_for_test_data_set") + fitting_weights[6] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "fraction_training_data_set") + fitting_weights[7] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "fraction_test_data_set") + fitting_weights[8] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "randomize_training_data_set") + fitting_weights[9] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "randomize_test_data_set") + fitting_weights[10] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "fitting_regularization_parameter") + fitting_weights[11] = utils::numeric(FLERR, words[1], false, lmp); + if (keywd == "precision_for_pod_coefficients") + precision = utils::inumeric(FLERR, words[1], false, lmp); + if (keywd == "save_pod_descriptors") + save_descriptors = utils::inumeric(FLERR, words[1], false, lmp); + if (keywd == "compute_pod_descriptors") + compute_descriptors = utils::inumeric(FLERR, words[1], false, lmp); // other settings @@ -287,29 +365,29 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, // group weight table if (keywd == "group_weights") group_weight_type = words[1]; - if (std::strcmp(group_weight_type.c_str(), "table") == 0){ + if (std::strcmp(group_weight_type.c_str(), "table") == 0) { // Read the table as a hash map. // Get next line. if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fpdata); + ptr = fgets(line, MAXLINE, fpdata); if (ptr == nullptr) { eof = 1; fclose(fpdata); } } - MPI_Bcast(&eof,1,MPI_INT,0,world); + MPI_Bcast(&eof, 1, MPI_INT, 0, world); if (eof) break; - MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + MPI_Bcast(line, MAXLINE, MPI_CHAR, 0, world); // Tokenize. try { - words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); + words = Tokenizer(utils::trim_comment(line), "\"' \t\n\r\f").as_vector(); } catch (TokenizerException &) { // ignore } int numwords = words.size(); // Loop over group table entries. - while (numwords == 3){ + while (numwords == 3) { // Insert in map. we_map[words[0]] = utils::numeric(FLERR, words[1], false, lmp); @@ -317,24 +395,23 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, // Get next line. if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fpdata); + ptr = fgets(line, MAXLINE, fpdata); if (ptr == nullptr) { eof = 1; fclose(fpdata); } } - MPI_Bcast(&eof,1,MPI_INT,0,world); + MPI_Bcast(&eof, 1, MPI_INT, 0, world); if (eof) break; - MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + MPI_Bcast(line, MAXLINE, MPI_CHAR, 0, world); // Tokenize. try { - words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); + words = Tokenizer(utils::trim_comment(line), "\"' \t\n\r\f").as_vector(); } catch (TokenizerException &) { // ignore } numwords = words.size(); } - } } @@ -365,7 +442,7 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, return precision; } -void FitPOD::get_exyz_files(std::vector& files, std::vector &group_names, +void FitPOD::get_exyz_files(std::vector &files, std::vector &group_names, const std::string &datapath, const std::string &extension) { auto allfiles = platform::list_directory(datapath); @@ -374,24 +451,23 @@ void FitPOD::get_exyz_files(std::vector& files, std::vector& num_atom, int& num_atom_sum, std::string file) +int FitPOD::get_number_atom_exyz(std::vector &num_atom, int &num_atom_sum, std::string file) { std::string filename = std::move(file); FILE *fp; if (comm->me == 0) { - fp = utils::open_potential(filename,lmp,nullptr); + fp = utils::open_potential(filename, lmp, nullptr); if (fp == nullptr) - error->one(FLERR,"Cannot open POD coefficient file {}: ", filename, utils::getsyserror()); + error->one(FLERR, "Cannot open POD coefficient file {}: ", filename, utils::getsyserror()); } - char line[MAXLINE],*ptr; + char line[MAXLINE], *ptr; int eof = 0; int num_configs = 0; num_atom_sum = 0; @@ -400,22 +476,22 @@ int FitPOD::get_number_atom_exyz(std::vector& num_atom, int& num_atom_sum, while (true) { if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fp); + ptr = fgets(line, MAXLINE, fp); if (ptr == nullptr) { eof = 1; fclose(fp); } } - MPI_Bcast(&eof,1,MPI_INT,0,world); + MPI_Bcast(&eof, 1, MPI_INT, 0, world); if (eof) break; - MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + MPI_Bcast(line, MAXLINE, MPI_CHAR, 0, world); // words = ptrs to all words in line // strip single and double quotes from words std::vector words; try { - words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); + words = Tokenizer(utils::trim_comment(line), "\"' \t\n\r\f").as_vector(); } catch (TokenizerException &) { // ignore } @@ -424,7 +500,7 @@ int FitPOD::get_number_atom_exyz(std::vector& num_atom, int& num_atom_sum, int natom; if (words.size() == 1) { - natom = utils::inumeric(FLERR,words[0],false,lmp); + natom = utils::inumeric(FLERR, words[0], false, lmp); num_atom.push_back(natom); num_configs += 1; num_atom_sum += natom; @@ -433,37 +509,38 @@ int FitPOD::get_number_atom_exyz(std::vector& num_atom, int& num_atom_sum, return num_configs; } -int FitPOD::get_number_atoms(std::vector& num_atom, std::vector &num_atom_sum, std::vector& num_config, std::vector training_files) +int FitPOD::get_number_atoms(std::vector &num_atom, std::vector &num_atom_sum, + std::vector &num_config, std::vector training_files) { - int nfiles = training_files.size(); // number of files + int nfiles = training_files.size(); // number of files int d, n; - for (int i=0; i species, double we_group, double wf_group) +void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, double *we, double *wf, + double *pos, double *forces, int *atomtype, std::string file, + std::vector species, double we_group, double wf_group) { std::string filename = std::move(file); FILE *fp; if (comm->me == 0) { - fp = utils::open_potential(filename,lmp,nullptr); + fp = utils::open_potential(filename, lmp, nullptr); if (fp == nullptr) - error->one(FLERR,"Cannot open POD coefficient file {}: ", filename, utils::getsyserror()); + error->one(FLERR, "Cannot open POD coefficient file {}: ", filename, utils::getsyserror()); } - char line[MAXLINE],*ptr; + char line[MAXLINE], *ptr; int eof = 0; int cfi = 0; int nat = 0; @@ -473,34 +550,36 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou while (true) { if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fp); + ptr = fgets(line, MAXLINE, fp); if (ptr == nullptr) { eof = 1; fclose(fp); } } - MPI_Bcast(&eof,1,MPI_INT,0,world); + MPI_Bcast(&eof, 1, MPI_INT, 0, world); if (eof) break; - MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + MPI_Bcast(line, MAXLINE, MPI_CHAR, 0, world); // words = ptrs to all words in line // strip single and double quotes from words std::vector words; try { - words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); + words = Tokenizer(utils::trim_comment(line), "\"' \t\n\r\f").as_vector(); } catch (TokenizerException &) { // ignore } if (words.size() == 0) continue; - ValueTokenizer text(utils::trim_comment(line),"\"' \t\n\r\f"); + ValueTokenizer text(utils::trim_comment(line), "\"' \t\n\r\f"); if (text.contains("attice")) { // find the word containing "lattice" - auto it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("attice") != std::string::npos; }); + auto it = std::find_if(words.begin(), words.end(), [](const std::string &str) { + return str.find("attice") != std::string::npos; + }); // get index of element from iterator @@ -511,14 +590,14 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou // lattice numbers start at index + 1 for (int k = 0; k < 9; k++) { - lattice[k + 9*cfi] = utils::numeric(FLERR,words[index+1+k],false,lmp); + lattice[k + 9 * cfi] = utils::numeric(FLERR, words[index + 1 + k], false, lmp); } } else { // lattice numbers start at index + 2 for (int k = 0; k < 9; k++) { - lattice[k + 9*cfi] = utils::numeric(FLERR,words[index+2+k],false,lmp); + lattice[k + 9 * cfi] = utils::numeric(FLERR, words[index + 2 + k], false, lmp); } } @@ -526,7 +605,9 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou // find the word containing "energy" - it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("nergy") != std::string::npos; }); + it = std::find_if(words.begin(), words.end(), [](const std::string &str) { + return str.find("nergy") != std::string::npos; + }); // get index of element from iterator @@ -537,17 +618,19 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou // energy is after "=" inside this string std::size_t found = words[index].find("="); - energy[cfi] = utils::numeric(FLERR,words[index].substr(found+1),false,lmp); + energy[cfi] = utils::numeric(FLERR, words[index].substr(found + 1), false, lmp); } else { // energy is at index + 2 - energy[cfi] = utils::numeric(FLERR,words[index+2],false,lmp); + energy[cfi] = utils::numeric(FLERR, words[index + 2], false, lmp); } // find the word containing "stress" - it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("tress") != std::string::npos; }); + it = std::find_if(words.begin(), words.end(), [](const std::string &str) { + return str.find("tress") != std::string::npos; + }); // get index of element from iterator @@ -559,14 +642,14 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou // stress numbers start at index + 1 for (int k = 0; k < 9; k++) { - stress[k + 9*cfi] = utils::numeric(FLERR,words[index+1+k],false,lmp); + stress[k + 9 * cfi] = utils::numeric(FLERR, words[index + 1 + k], false, lmp); } } else { // lattice numbers start at index + 2 for (int k = 0; k < 9; k++) { - stress[k + 9*cfi] = utils::numeric(FLERR,words[index+2+k],false,lmp); + stress[k + 9 * cfi] = utils::numeric(FLERR, words[index + 2 + k], false, lmp); } } } @@ -585,17 +668,15 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou else if (words.size() > 1) { for (int ii = 0; ii < ns; ii++) - if (species[ii] == words[0]) - atomtype[nat] = ii+1; + if (species[ii] == words[0]) atomtype[nat] = ii + 1; - if (compute_descriptors> 0) { + if (compute_descriptors > 0) { for (int k = 0; k < 3; k++) - pos[k + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp); - } - else { + pos[k + 3 * nat] = utils::numeric(FLERR, words[1 + k], false, lmp); + } else { for (int k = 0; k < 6; k++) { - if (k <= 2) pos[k + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp); - if (k > 2 ) forces[k-3 + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp); + if (k <= 2) pos[k + 3 * nat] = utils::numeric(FLERR, words[1 + k], false, lmp); + if (k > 2) forces[k - 3 + 3 * nat] = utils::numeric(FLERR, words[1 + k], false, lmp); } } @@ -607,22 +688,23 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou void FitPOD::get_data(datastruct &data, const std::vector &species) { get_exyz_files(data.data_files, data.group_names, data.data_path, data.file_extension); - data.num_atom_sum = get_number_atoms(data.num_atom, data.num_atom_each_file, data.num_config, data.data_files); + data.num_atom_sum = + get_number_atoms(data.num_atom, data.num_atom_each_file, data.num_config, data.data_files); data.num_config_sum = data.num_atom.size(); size_t maxname = 9; - for (const auto &fname : data.data_files) maxname = MAX(maxname,fname.size()); - maxname -= data.data_path.size()+1; - const std::string sepline(maxname+46, '-'); + for (const auto &fname : data.data_files) maxname = MAX(maxname, fname.size()); + maxname -= data.data_path.size() + 1; + const std::string sepline(maxname + 46, '-'); if (comm->me == 0) - utils::logmesg(lmp, "{}\n {:^{}} | number of configurations | number of atoms\n{}\n", - sepline, "data file", maxname, sepline); + utils::logmesg(lmp, "{}\n {:^{}} | number of configurations | number of atoms\n{}\n", sepline, + "data file", maxname, sepline); int i = 0; for (const auto &fname : data.data_files) { - std::string filename = fname.substr(data.data_path.size()+1); + std::string filename = fname.substr(data.data_path.size() + 1); data.filenames.push_back(filename); if (comm->me == 0) - utils::logmesg(lmp, " {:<{}} | {:>10} | {:>8}\n", - filename, maxname, data.num_config[i], data.num_atom_each_file[i]); + utils::logmesg(lmp, " {:<{}} | {:>10} | {:>8}\n", filename, maxname, + data.num_config[i], data.num_atom_each_file[i]); ++i; } if (comm->me == 0) { @@ -633,31 +715,32 @@ void FitPOD::get_data(datastruct &data, const std::vector &species) } if (data.data_files.size() < 1) - error->all(FLERR, "Cannot fit potential without data files. The data paths may not be valid. Please check the data paths in the POD data file."); + error->all(FLERR, + "Cannot fit potential without data files. The data paths may not be valid. Please " + "check the data paths in the POD data file."); int n = data.num_config_sum; - memory->create(data.lattice, 9*n, "fitpod:lattice"); - memory->create(data.stress, 9*n, "fitpod:stress"); + memory->create(data.lattice, 9 * n, "fitpod:lattice"); + memory->create(data.stress, 9 * n, "fitpod:stress"); memory->create(data.energy, n, "fitpod:energy"); // Group weights have same size as energy. memory->create(data.we, n, "fitpod:we"); memory->create(data.wf, n, "fitpod:wf"); n = data.num_atom_sum; - memory->create(data.position, 3*n, "fitpod:position"); - memory->create(data.force, 3*n, "fitpod:force"); + memory->create(data.position, 3 * n, "fitpod:position"); + memory->create(data.force, 3 * n, "fitpod:force"); memory->create(data.atomtype, n, "fitpod:atomtype"); - double we_group, wf_group; // group weights - int nfiles = data.data_files.size(); // number of files + double we_group, wf_group; // group weights + int nfiles = data.data_files.size(); // number of files int nconfigs = 0; int natoms = 0; - for (int i=0; i &species) wf_group = data.fitting_weights[1]; } //utils::logmesg(lmp, "Read xyz file: {}\n", group_name); - read_exyz_file(&data.lattice[9*nconfigs], &data.stress[9*nconfigs], &data.energy[nconfigs], &data.we[nconfigs], &data.wf[nconfigs], - &data.position[3*natoms], &data.force[3*natoms], &data.atomtype[natoms], - data.data_files[i], species, we_group, wf_group); + read_exyz_file(&data.lattice[9 * nconfigs], &data.stress[9 * nconfigs], &data.energy[nconfigs], + &data.we[nconfigs], &data.wf[nconfigs], &data.position[3 * natoms], + &data.force[3 * natoms], &data.atomtype[natoms], data.data_files[i], species, + we_group, wf_group); nconfigs += data.num_config[i]; natoms += data.num_atom_each_file[i]; } @@ -675,22 +759,22 @@ void FitPOD::get_data(datastruct &data, const std::vector &species) int len = data.num_atom.size(); data.num_atom_min = podArrayMin(&data.num_atom[0], len); data.num_atom_max = podArrayMax(&data.num_atom[0], len); - data.num_atom_cumsum.resize(len+1); - podCumsum(&data.num_atom_cumsum[0], &data.num_atom[0], len+1); + data.num_atom_cumsum.resize(len + 1); + podCumsum(&data.num_atom_cumsum[0], &data.num_atom[0], len + 1); - data.num_config_cumsum.resize(nfiles+1); - podCumsum(&data.num_config_cumsum[0], &data.num_config[0], nfiles+1); + data.num_config_cumsum.resize(nfiles + 1); + podCumsum(&data.num_config_cumsum[0], &data.num_config[0], nfiles + 1); // convert all structures to triclinic system constexpr int DIM = 3; - double Qmat[DIM*DIM]; - for (int ci=0; ci FitPOD::linspace(int start_in, int end_in, int num_in) int elm; if (num == 0) { return linspaced; } - if (num == 1) - { + if (num == 1) { elm = (int) std::round(start); linspaced.push_back(elm); return linspaced; @@ -729,8 +812,7 @@ std::vector FitPOD::linspace(int start_in, int end_in, int num_in) double delta = (end - start) / (num - 1); - for(int i=0; i < num-1; ++i) - { + for (int i = 0; i < num - 1; ++i) { elm = (int) std::round(start + delta * i); linspaced.push_back(elm); } @@ -746,16 +828,14 @@ std::vector FitPOD::shuffle(int start_in, int end_in, int num_in) int sz = end_in - start_in + 1; std::vector myvector(sz); - for (int i = 0; i shuffle_vec(num_in); - for (int i = 0; i FitPOD::select(int n, double fraction, int randomize) { std::vector selected; - int m = (int) std::round(n*fraction); + int m = (int) std::round(n * fraction); m = MAX(m, 1); - selected = (randomize==1) ? shuffle(1, n, m) : linspace(1, n, m); + selected = (randomize == 1) ? shuffle(1, n, m) : linspace(1, n, m); return selected; } @@ -778,28 +858,30 @@ void FitPOD::select_data(datastruct &newdata, const datastruct &data) int randomize = data.randomize; if (comm->me == 0) { - if (randomize==1) - utils::logmesg(lmp, "Select {} fraction of the data set at random using shuffle\n", data.fraction); + if (randomize == 1) + utils::logmesg(lmp, "Select {} fraction of the data set at random using shuffle\n", + data.fraction); else - utils::logmesg(lmp, "Select {} fraction of the data set deterministically using linspace\n", data.fraction); + utils::logmesg(lmp, "Select {} fraction of the data set deterministically using linspace\n", + data.fraction); } - int nfiles = data.data_files.size(); // number of files + int nfiles = data.data_files.size(); // number of files std::vector> selected(nfiles); newdata.num_config.resize(nfiles); - newdata.num_config_cumsum.resize(nfiles+1); + newdata.num_config_cumsum.resize(nfiles + 1); newdata.num_atom_each_file.resize(nfiles); for (int file = 0; file < nfiles; file++) { int nconfigs = data.num_config[file]; selected[file] = select(nconfigs, fraction, randomize); - int ns = (int) selected[file].size(); // number of selected configurations + int ns = (int) selected[file].size(); // number of selected configurations newdata.num_config[file] = ns; int num_atom_sum = 0; - for (int ii=0; ii < ns; ii++) { // loop over each selected configuration in a file - int ci = data.num_config_cumsum[file] + selected[file][ii] - 1; + for (int ii = 0; ii < ns; ii++) { // loop over each selected configuration in a file + int ci = data.num_config_cumsum[file] + selected[file][ii] - 1; int natom = data.num_atom[ci]; newdata.num_atom.push_back(natom); num_atom_sum += natom; @@ -809,31 +891,31 @@ void FitPOD::select_data(datastruct &newdata, const datastruct &data) int len = newdata.num_atom.size(); newdata.num_atom_min = podArrayMin(&newdata.num_atom[0], len); newdata.num_atom_max = podArrayMax(&newdata.num_atom[0], len); - newdata.num_atom_cumsum.resize(len+1); - podCumsum(&newdata.num_atom_cumsum[0], &newdata.num_atom[0], len+1); + newdata.num_atom_cumsum.resize(len + 1); + podCumsum(&newdata.num_atom_cumsum[0], &newdata.num_atom[0], len + 1); newdata.num_atom_sum = newdata.num_atom_cumsum[len]; - podCumsum(&newdata.num_config_cumsum[0], &newdata.num_config[0], nfiles+1); + podCumsum(&newdata.num_config_cumsum[0], &newdata.num_config[0], nfiles + 1); newdata.num_config_sum = newdata.num_atom.size(); int n = newdata.num_config_sum; - memory->create(newdata.lattice, 9*n, "fitpod:newdata_lattice"); - memory->create(newdata.stress, 9*n, "fitpod:newdata_stress"); + memory->create(newdata.lattice, 9 * n, "fitpod:newdata_lattice"); + memory->create(newdata.stress, 9 * n, "fitpod:newdata_stress"); memory->create(newdata.energy, n, "fitpod:newdata_energy"); // Group weights have same size as energy. memory->create(newdata.we, n, "fitpod:we"); memory->create(newdata.wf, n, "fitpod:wf"); n = newdata.num_atom_sum; - memory->create(newdata.position, 3*n, "fitpod:newdata_position"); - memory->create(newdata.force, 3*n, "fitpod:newdata_force"); + memory->create(newdata.position, 3 * n, "fitpod:newdata_position"); + memory->create(newdata.force, 3 * n, "fitpod:newdata_force"); memory->create(newdata.atomtype, n, "fitpod:newdata_atomtype"); int cn = 0; int dim = 3; for (int file = 0; file < nfiles; file++) { - int ns = (int) selected[file].size(); // number of selected configurations - for (int ii=0; ii < ns; ii++) { // loop over each selected configuration in a file - int ci = data.num_config_cumsum[file] + selected[file][ii] - 1; + int ns = (int) selected[file].size(); // number of selected configurations + for (int ii = 0; ii < ns; ii++) { // loop over each selected configuration in a file + int ci = data.num_config_cumsum[file] + selected[file][ii] - 1; int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; @@ -841,25 +923,27 @@ void FitPOD::select_data(datastruct &newdata, const datastruct &data) int natomnew_cumsum = newdata.num_atom_cumsum[cn]; if (natom != natomnew) - error->all(FLERR,"number of atoms in the new data set must be the same as that in the old data set."); + error->all( + FLERR, + "number of atoms in the new data set must be the same as that in the old data set."); int *atomtype = &data.atomtype[natom_cumsum]; - double *position = &data.position[dim*natom_cumsum]; - double *force = &data.force[dim*natom_cumsum]; + double *position = &data.position[dim * natom_cumsum]; + double *force = &data.force[dim * natom_cumsum]; newdata.energy[cn] = data.energy[ci]; newdata.we[cn] = data.we[ci]; newdata.wf[cn] = data.wf[ci]; - for (int j=0; j<9; j++) { - newdata.stress[j+9*cn] = data.stress[j+9*ci]; - newdata.lattice[j+9*cn] = data.lattice[j+9*ci]; + for (int j = 0; j < 9; j++) { + newdata.stress[j + 9 * cn] = data.stress[j + 9 * ci]; + newdata.lattice[j + 9 * cn] = data.lattice[j + 9 * ci]; } - for (int na=0; name == 0) - utils::logmesg(lmp, "{:-<{}}\n {:^{}} | # configs (selected) | # atoms (selected) " + utils::logmesg(lmp, + "{:-<{}}\n {:^{}} | # configs (selected) | # atoms (selected) " "| # configs (original) | # atoms (original)\n{:-<{}}\n", - "", maxname+90, "data_file", maxname, "", maxname+90); - for (int i=0; i< (int) newdata.data_files.size(); i++) { - std::string filename = newdata.data_files[i].substr(newdata.data_path.size()+1,newdata.data_files[i].size()); + "", maxname + 90, "data_file", maxname, "", maxname + 90); + for (int i = 0; i < (int) newdata.data_files.size(); i++) { + std::string filename = + newdata.data_files[i].substr(newdata.data_path.size() + 1, newdata.data_files[i].size()); newdata.filenames.emplace_back(filename.c_str()); if (comm->me == 0) - utils::logmesg(lmp, " {:<{}} | {:>8} | {:>8} | {:>8} | {:>8}\n", - newdata.filenames[i], maxname, newdata.num_config[i], newdata.num_atom_each_file[i], - data.num_config[i], data.num_atom_each_file[i]); + utils::logmesg( + lmp, " {:<{}} | {:>8} | {:>8} | {:>8} | {:>8}\n", + newdata.filenames[i], maxname, newdata.num_config[i], newdata.num_atom_each_file[i], + data.num_config[i], data.num_atom_each_file[i]); } if (comm->me == 0) { - utils::logmesg(lmp, "{:-<{}}\nnumber of files: {}\n", "", maxname+90, newdata.data_files.size()); - utils::logmesg(lmp, "number of configurations in all files (selected and original): {} and {}\n", newdata.num_config_sum, data.num_config_sum); - utils::logmesg(lmp, "number of atoms in all files (selected and original: {} and {}\n", newdata.num_atom_sum, data.num_atom_sum); + utils::logmesg(lmp, "{:-<{}}\nnumber of files: {}\n", "", maxname + 90, + newdata.data_files.size()); + utils::logmesg(lmp, + "number of configurations in all files (selected and original): {} and {}\n", + newdata.num_config_sum, data.num_config_sum); + utils::logmesg(lmp, "number of atoms in all files (selected and original: {} and {}\n", + newdata.num_atom_sum, data.num_atom_sum); } } -void FitPOD::read_data_files(const std::string& data_file, const std::vector& species) +void FitPOD::read_data_files(const std::string &data_file, const std::vector &species) { datastruct data; // read data input file to datastruct - data.precision = read_data_file(data.fitting_weights, data.file_format, data.file_extension, envdata.data_path, - testdata.data_path, data.data_path, data.filenametag, data_file, data.group_weight_type, data.we_map, data.wf_map); + data.precision = + read_data_file(data.fitting_weights, data.file_format, data.file_extension, envdata.data_path, + testdata.data_path, data.data_path, data.filenametag, data_file, + data.group_weight_type, data.we_map, data.wf_map); data.training_analysis = (int) data.fitting_weights[3]; data.test_analysis = (int) data.fitting_weights[4]; @@ -914,7 +1007,7 @@ void FitPOD::read_data_files(const std::string& data_file, const std::vector 1) get_data(traindata, species); else - error->all(FLERR,"data set is not found"); + error->all(FLERR, "data set is not found"); if (comm->me == 0) utils::logmesg(lmp, "**************** End of Training Data Set ****************\n"); } else { @@ -923,7 +1016,7 @@ void FitPOD::read_data_files(const std::string& data_file, const std::vector 1) get_data(data, species); else - error->all(FLERR,"data set is not found"); + error->all(FLERR, "data set is not found"); if (comm->me == 0) utils::logmesg(lmp, "**************** End of Training Data Set ****************\n"); @@ -943,26 +1036,29 @@ void FitPOD::read_data_files(const std::string& data_file, const std::vector 1) && (desc.nClusters > 1)) { - envdata.filenametag = traindata.filenametag; + envdata.filenametag = traindata.filenametag; envdata.file_format = traindata.file_format; envdata.file_extension = traindata.file_extension; int tmp = compute_descriptors; compute_descriptors = 1; if (comm->me == 0) - utils::logmesg(lmp, "**************** Begin of Environment Configuration Set ****************\n"); + utils::logmesg(lmp, + "**************** Begin of Environment Configuration Set ****************\n"); get_data(envdata, species); if (comm->me == 0) - utils::logmesg(lmp, "**************** End of Environment Configuration Set ****************\n"); + utils::logmesg(lmp, + "**************** End of Environment Configuration Set ****************\n"); compute_descriptors = tmp; } - if ((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0) && (traindata.fraction == 1.0)) { + if ((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0) && + (traindata.fraction == 1.0)) { testdata.data_path = traindata.data_path; - } - else if (((int) testdata.data_path.size() > 1) && (testdata.fraction > 0) && (testdata.test_analysis)) { + } else if (((int) testdata.data_path.size() > 1) && (testdata.fraction > 0) && + (testdata.test_analysis)) { testdata.training = 0; testdata.file_format = traindata.file_format; testdata.file_extension = traindata.file_extension; @@ -977,8 +1073,7 @@ void FitPOD::read_data_files(const std::string& data_file, const std::vectorme == 0) utils::logmesg(lmp, "**************** End of Test Data Set ****************\n"); - } - else { + } else { datastruct datatm; testdata.copydatainfo(datatm); @@ -1001,65 +1096,65 @@ void FitPOD::read_data_files(const std::string& data_file, const std::vectordestroy(datatm.force); memory->destroy(datatm.atomtype); } - } - else { + } else { testdata.data_path = traindata.data_path; } } -int FitPOD::latticecoords(double *y, int *alist, double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, int nx) +int FitPOD::latticecoords(double *y, int *alist, double *x, double *a1, double *a2, double *a3, + double rcut, int *pbc, int nx) { - int m=0, n=0, p=0; - if (pbc[0] == 1) m = (int) ceil(rcut/a1[0]); - if (pbc[1] == 1) n = (int) ceil(rcut/a2[1]); - if (pbc[2] == 1) p = (int) ceil(rcut/a3[2]); + int m = 0, n = 0, p = 0; + if (pbc[0] == 1) m = (int) ceil(rcut / a1[0]); + if (pbc[1] == 1) n = (int) ceil(rcut / a2[1]); + if (pbc[2] == 1) p = (int) ceil(rcut / a3[2]); // index for the center lattice - int ind = m + (2*m+1)*(n) + (2*m+1)*(2*n+1)*(p); + int ind = m + (2 * m + 1) * (n) + (2 * m + 1) * (2 * n + 1) * (p); // number of lattices - int nl = (2*m+1)*(2*n+1)*(2*p+1); + int nl = (2 * m + 1) * (2 * n + 1) * (2 * p + 1); - for (int j=0; j<3*nx; j++) - y[j] = x[j]; + for (int j = 0; j < 3 * nx; j++) y[j] = x[j]; int q = nx; - for (int i = 0; i < (2*p+1); i++) - for (int j = 0; j < (2*n+1); j++) - for (int k = 0; k < (2*m+1); k++) { - int ii = k + (2*m+1)*j + (2*m+1)*(2*n+1)*i; + for (int i = 0; i < (2 * p + 1); i++) + for (int j = 0; j < (2 * n + 1); j++) + for (int k = 0; k < (2 * m + 1); k++) { + int ii = k + (2 * m + 1) * j + (2 * m + 1) * (2 * n + 1) * i; if (ii != ind) { - double x0 = a1[0]*(k - m) + a2[0]*(j - n) + a3[0]*(i - p); - double x1 = a1[1]*(k - m) + a2[1]*(j - n) + a3[1]*(i - p); - double x2 = a1[2]*(k - m) + a2[2]*(j - n) + a3[2]*(i - p); - for (int jj=0; jj SMALL) && (rijsq <= rcutsq)) { + for (int j = 0; j < N; j++) { + double *rj = &r[dim * j]; + double rijsq = (ri[0] - rj[0]) * (ri[0] - rj[0]) + (ri[1] - rj[1]) * (ri[1] - rj[1]) + + (ri[2] - rj[2]) * ((ri[2] - rj[2])); + if ((rijsq > SMALL) && (rijsq <= rcutsq)) { inc += 1; neighlist[k] = j; k += 1; @@ -1070,53 +1165,54 @@ int FitPOD::podneighborlist(int *neighlist, int *numneigh, double *r, double rcu return k; } -int FitPOD::podfullneighborlist(double *y, int *alist, int *neighlist, int *numneigh, int *numneighsum, - double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, int nx) +int FitPOD::podfullneighborlist(double *y, int *alist, int *neighlist, int *numneigh, + int *numneighsum, double *x, double *a1, double *a2, double *a3, + double rcut, int *pbc, int nx) { - double rcutsq = rcut*rcut; + double rcutsq = rcut * rcut; int dim = 3, nl = 0, nn = 0; // number of lattices nl = latticecoords(y, alist, x, a1, a2, a3, rcut, pbc, nx); - int N = nx*nl; + int N = nx * nl; // total number of neighbors nn = podneighborlist(neighlist, numneigh, y, rcutsq, nx, N, dim); - podCumsum(numneighsum, numneigh, nx+1); + podCumsum(numneighsum, numneigh, nx + 1); return nn; } -void FitPOD::estimate_memory_neighborstruct(const datastruct &data, int *pbc, double rcut, int nelements) +void FitPOD::estimate_memory_neighborstruct(const datastruct &data, int *pbc, double rcut, + int nelements) { int dim = 3; int natom_max = data.num_atom_max; - int m=0, n=0, p=0, nl=0, ny=0, na=0, np=0; + int m = 0, n = 0, p = 0, nl = 0, ny = 0, na = 0, np = 0; - for (int ci=0; ci<(int) data.num_atom.size(); ci++) - { + for (int ci = 0; ci < (int) data.num_atom.size(); ci++) { int natom = data.num_atom[ci]; - double *lattice = &data.lattice[9*ci]; + double *lattice = &data.lattice[9 * ci]; double *a1 = &lattice[0]; double *a2 = &lattice[3]; double *a3 = &lattice[6]; - if (pbc[0] == 1) m = (int) ceil(rcut/a1[0]); - if (pbc[1] == 1) n = (int) ceil(rcut/a2[1]); - if (pbc[2] == 1) p = (int) ceil(rcut/a3[2]); + if (pbc[0] == 1) m = (int) ceil(rcut / a1[0]); + if (pbc[1] == 1) n = (int) ceil(rcut / a2[1]); + if (pbc[2] == 1) p = (int) ceil(rcut / a3[2]); // number of lattices - nl = (2*m+1)*(2*n+1)*(2*p+1); - ny = MAX(ny,dim*natom*nl); - na = MAX(na, natom*nl); - np = MAX(np, natom*natom*nl); + nl = (2 * m + 1) * (2 * n + 1) * (2 * p + 1); + ny = MAX(ny, dim * natom * nl); + na = MAX(na, natom * nl); + np = MAX(np, natom * natom * nl); } nb.natom_max = MAX(nb.natom_max, natom_max); - nb.sze = nelements*nelements; + nb.sze = nelements * nelements; nb.sza = MAX(nb.sza, na); nb.szy = MAX(nb.szy, ny); nb.szp = MAX(nb.szp, np); @@ -1127,20 +1223,20 @@ void FitPOD::allocate_memory_neighborstruct() memory->create(nb.y, nb.szy, "fitpod:nb_y"); memory->create(nb.alist, nb.sza, "fitpod:nb_alist"); memory->create(nb.pairnum, nb.natom_max, "fitpod:nb_pairnum"); - memory->create(nb.pairnum_cumsum, nb.natom_max+1, "fitpod:nb_pairnum_cumsum"); + memory->create(nb.pairnum_cumsum, nb.natom_max + 1, "fitpod:nb_pairnum_cumsum"); memory->create(nb.pairlist, nb.szp, "fitpod:nb_pairlist"); } void FitPOD::allocate_memory_descriptorstruct(int nCoeffAll) { - memory->create(desc.bd, nb.natom_max*fastpodptr->Mdesc, "fitpod:desc_ld"); - memory->create(desc.pd, nb.natom_max*fastpodptr->nClusters, "fitpod:desc_ld"); + memory->create(desc.bd, nb.natom_max * fastpodptr->Mdesc, "fitpod:desc_ld"); + memory->create(desc.pd, nb.natom_max * fastpodptr->nClusters, "fitpod:desc_ld"); memory->create(desc.gd, nCoeffAll, "fitpod:desc_gd"); - memory->create(desc.A, nCoeffAll*nCoeffAll, "fitpod:desc_A"); + memory->create(desc.A, nCoeffAll * nCoeffAll, "fitpod:desc_A"); memory->create(desc.b, nCoeffAll, "fitpod:desc_b"); memory->create(desc.c, nCoeffAll, "fitpod:desc_c"); memory->create(desc.gdd, desc.szd, "fitpod:desc_gdd"); - podArraySetValue(desc.A, 0.0, nCoeffAll*nCoeffAll); + podArraySetValue(desc.A, 0.0, nCoeffAll * nCoeffAll); podArraySetValue(desc.b, 0.0, nCoeffAll); podArraySetValue(desc.c, 0.0, nCoeffAll); @@ -1161,22 +1257,22 @@ void FitPOD::estimate_memory_fastpod(const datastruct &data) int *pbc = fastpodptr->pbc; double rcut = fastpodptr->rcut; - int Nij=0, Nijmax=0; - for (int ci=0; ci<(int) data.num_atom.size(); ci++) - { + int Nij = 0, Nijmax = 0; + for (int ci = 0; ci < (int) data.num_atom.size(); ci++) { int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; - double *x = &data.position[dim*natom_cumsum]; - double *lattice = &data.lattice[9*ci]; + double *x = &data.position[dim * natom_cumsum]; + double *lattice = &data.lattice[9 * ci]; double *a1 = &lattice[0]; double *a2 = &lattice[3]; double *a3 = &lattice[6]; - Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, x, a1, a2, a3, rcut, pbc, natom); + Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, x, a1, a2, + a3, rcut, pbc, natom); Nijmax = MAX(Nijmax, Nij); } - desc.szd = MAX(desc.szd, 3*Nijmax*fastpodptr->nCoeffAll); + desc.szd = MAX(desc.szd, 3 * Nijmax * fastpodptr->nCoeffAll); } void FitPOD::local_descriptors_fastpod(const datastruct &data, int ci) @@ -1188,23 +1284,22 @@ void FitPOD::local_descriptors_fastpod(const datastruct &data, int ci) int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; int *atomtype = &data.atomtype[natom_cumsum]; - double *position = &data.position[dim*natom_cumsum]; - double *lattice = &data.lattice[9*ci]; + double *position = &data.position[dim * natom_cumsum]; + double *lattice = &data.lattice[9 * ci]; double *a1 = &lattice[0]; double *a2 = &lattice[3]; double *a3 = &lattice[6]; // neighbor list - podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, - position, a1, a2, a3, rcut, pbc, natom); + podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, position, a1, a2, + a3, rcut, pbc, natom); if (desc.nClusters > 1) { - fastpodptr->descriptors(desc.gd, desc.gdd, desc.bd, desc.pd, nb.y, atomtype, nb.alist, nb.pairlist, - nb.pairnum_cumsum, natom); - } - else { + fastpodptr->descriptors(desc.gd, desc.gdd, desc.bd, desc.pd, nb.y, atomtype, nb.alist, + nb.pairlist, nb.pairnum_cumsum, natom); + } else { fastpodptr->descriptors(desc.gd, desc.gdd, desc.bd, nb.y, atomtype, nb.alist, nb.pairlist, - nb.pairnum_cumsum, natom); + nb.pairnum_cumsum, natom); } } @@ -1217,18 +1312,18 @@ void FitPOD::base_descriptors_fastpod(const datastruct &data, int ci) int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; int *atomtype = &data.atomtype[natom_cumsum]; - double *position = &data.position[dim*natom_cumsum]; - double *lattice = &data.lattice[9*ci]; + double *position = &data.position[dim * natom_cumsum]; + double *lattice = &data.lattice[9 * ci]; double *a1 = &lattice[0]; double *a2 = &lattice[3]; double *a3 = &lattice[6]; // neighbor list - podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, - position, a1, a2, a3, rcut, pbc, natom); + podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, position, a1, a2, + a3, rcut, pbc, natom); - fastpodptr->base_descriptors(desc.bd, nb.y, atomtype, nb.alist, nb.pairlist, - nb.pairnum_cumsum, natom); + fastpodptr->base_descriptors(desc.bd, nb.y, atomtype, nb.alist, nb.pairlist, nb.pairnum_cumsum, + natom); } void FitPOD::descriptors_calculation(const datastruct &data) @@ -1239,11 +1334,10 @@ void FitPOD::descriptors_calculation(const datastruct &data) // loop over each configuration in the training data set double sz[2]; - for (int ci=0; ci < (int) data.num_atom.size(); ci++) { + for (int ci = 0; ci < (int) data.num_atom.size(); ci++) { - if ((ci % 100)==0) { - if (comm->me == 0) - utils::logmesg(lmp, "Configuration: # {}\n", ci+1); + if ((ci % 100) == 0) { + if (comm->me == 0) utils::logmesg(lmp, "Configuration: # {}\n", ci + 1); } if ((ci % comm->nprocs) == comm->me) { @@ -1251,33 +1345,39 @@ void FitPOD::descriptors_calculation(const datastruct &data) // compute local POD descriptors local_descriptors_fastpod(data, ci); - std::string filename0 = data.data_path + "/basedescriptors_config" + std::to_string(ci+1) + ".bin"; + std::string filename0 = + data.data_path + "/basedescriptors_config" + std::to_string(ci + 1) + ".bin"; FILE *fp0 = fopen(filename0.c_str(), "wb"); sz[0] = (double) data.num_atom[ci]; sz[1] = (double) fastpodptr->Mdesc; - fwrite( reinterpret_cast( sz ), sizeof(double) * (2), 1, fp0); - fwrite( reinterpret_cast( desc.bd ), sizeof(double) * (data.num_atom[ci]*fastpodptr->Mdesc), 1, fp0); + fwrite(reinterpret_cast(sz), sizeof(double) * (2), 1, fp0); + fwrite(reinterpret_cast(desc.bd), + sizeof(double) * (data.num_atom[ci] * fastpodptr->Mdesc), 1, fp0); fclose(fp0); - if (desc.nClusters>1) { - std::string filename1 = data.data_path + "/environmentdescriptors_config" + std::to_string(ci+1) + ".bin"; + if (desc.nClusters > 1) { + std::string filename1 = + data.data_path + "/environmentdescriptors_config" + std::to_string(ci + 1) + ".bin"; FILE *fp1 = fopen(filename1.c_str(), "wb"); sz[0] = (double) data.num_atom[ci]; sz[1] = (double) fastpodptr->nClusters; - fwrite( reinterpret_cast( sz ), sizeof(double) * (2), 1, fp1); - fwrite( reinterpret_cast( desc.pd ), sizeof(double) * (data.num_atom[ci]*fastpodptr->nClusters), 1, fp1); + fwrite(reinterpret_cast(sz), sizeof(double) * (2), 1, fp1); + fwrite(reinterpret_cast(desc.pd), + sizeof(double) * (data.num_atom[ci] * fastpodptr->nClusters), 1, fp1); fclose(fp1); } - std::string filename = data.data_path + "/globaldescriptors_config" + std::to_string(ci+1) + ".bin"; + std::string filename = + data.data_path + "/globaldescriptors_config" + std::to_string(ci + 1) + ".bin"; FILE *fp = fopen(filename.c_str(), "wb"); sz[0] = (double) data.num_atom[ci]; sz[1] = (double) desc.nCoeffAll; - fwrite( reinterpret_cast( sz ), sizeof(double) * (2), 1, fp); - fwrite( reinterpret_cast( desc.gd ), sizeof(double) * (desc.nCoeffAll), 1, fp); - if (compute_descriptors==2) { - fwrite( reinterpret_cast( desc.gdd ), sizeof(double) * (3*data.num_atom[ci]*desc.nCoeffAll), 1, fp); + fwrite(reinterpret_cast(sz), sizeof(double) * (2), 1, fp); + fwrite(reinterpret_cast(desc.gd), sizeof(double) * (desc.nCoeffAll), 1, fp); + if (compute_descriptors == 2) { + fwrite(reinterpret_cast(desc.gdd), + sizeof(double) * (3 * data.num_atom[ci] * desc.nCoeffAll), 1, fp); } fclose(fp); } @@ -1290,18 +1390,19 @@ void FitPOD::descriptors_calculation(const datastruct &data) void FitPOD::environment_cluster_calculation(const datastruct &data) { if (comm->me == 0) - utils::logmesg(lmp, "**************** Begin Calculating Environment Descriptor Matrix ****************\n"); + utils::logmesg( + lmp, "**************** Begin Calculating Environment Descriptor Matrix ****************\n"); int nComponents = fastpodptr->nComponents; int Mdesc = fastpodptr->Mdesc; int nClusters = fastpodptr->nClusters; int nelements = fastpodptr->nelements; - memory->create(fastpodptr->Centroids, nClusters*nComponents*nelements, "fitpod:centroids"); - memory->create(fastpodptr->Proj, Mdesc*nComponents*nelements, "fitpod:P"); + memory->create(fastpodptr->Centroids, nClusters * nComponents * nelements, "fitpod:centroids"); + memory->create(fastpodptr->Proj, Mdesc * nComponents * nelements, "fitpod:P"); int nAtoms = 0; int nTotalAtoms = 0; - for (int ci=0; ci < (int) data.num_atom.size(); ci++) { + for (int ci = 0; ci < (int) data.num_atom.size(); ci++) { if ((ci % comm->nprocs) == comm->me) nAtoms += data.num_atom[ci]; nTotalAtoms += data.num_atom[ci]; } @@ -1318,16 +1419,16 @@ void FitPOD::environment_cluster_calculation(const datastruct &data) int *nElemAtomsCumSum; int *nElemAtomsCount; - memory->create(basedescmatrix, nAtoms*Mdesc, "fitpod:basedescmatrix"); - memory->create(pca, nAtoms*nComponents, "fitpod:pca"); - memory->create(A, Mdesc*Mdesc, "fitpod:A"); - memory->create(work, Mdesc*Mdesc, "fitpod:work"); + memory->create(basedescmatrix, nAtoms * Mdesc, "fitpod:basedescmatrix"); + memory->create(pca, nAtoms * nComponents, "fitpod:pca"); + memory->create(A, Mdesc * Mdesc, "fitpod:A"); + memory->create(work, Mdesc * Mdesc, "fitpod:work"); memory->create(b, Mdesc, "fitpod:b"); - memory->create(Lambda, Mdesc*nelements, "fitpod:Lambda"); - memory->create(clusterSizes, nClusters*nelements, "fitpod:clusterSizes"); + memory->create(Lambda, Mdesc * nelements, "fitpod:Lambda"); + memory->create(clusterSizes, nClusters * nelements, "fitpod:clusterSizes"); memory->create(assignments, nAtoms, "fitpod:assignments"); memory->create(nElemAtoms, nelements, "fitpod:nElemAtoms"); - memory->create(nElemAtomsCumSum, 1+nelements, "fitpod:nElemAtomsCumSum"); + memory->create(nElemAtomsCumSum, 1 + nelements, "fitpod:nElemAtomsCumSum"); memory->create(nElemAtomsCount, nelements, "fitpod:nElemAtomsCount"); char chn = 'N'; @@ -1336,110 +1437,123 @@ void FitPOD::environment_cluster_calculation(const datastruct &data) char chu = 'U'; double alpha = 1.0, beta = 0.0; - for (int elem=0; elem < nelements; elem++) { - nElemAtoms[elem] = 0; // number of atoms for this element + for (int elem = 0; elem < nelements; elem++) { + nElemAtoms[elem] = 0; // number of atoms for this element } - for (int ci=0; ci < (int) data.num_atom.size(); ci++) { + for (int ci = 0; ci < (int) data.num_atom.size(); ci++) { if ((ci % comm->nprocs) == comm->me) { - int natom = data.num_atom[ci]; - int natom_cumsum = data.num_atom_cumsum[ci]; - int *atomtype = &data.atomtype[natom_cumsum]; - for (int n=0; nme == 0) - utils::logmesg(lmp, "Configuration: # {}\n", ci+1); + // loop over each configuration in the data set + for (int ci = 0; ci < (int) data.num_atom.size(); ci++) { + if ((ci % 100) == 0) { + if (comm->me == 0) utils::logmesg(lmp, "Configuration: # {}\n", ci + 1); } if ((ci % comm->nprocs) == comm->me) { base_descriptors_fastpod(data, ci); // basedescmatrix is a Mdesc x nAtoms matrix - int natom = data.num_atom[ci]; + int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; int *atomtype = &data.atomtype[natom_cumsum]; - for (int n=0; nProj[nComponents*Mdesc*elem]; - double *centroids = &fastpodptr->Centroids[nComponents*nClusters*elem]; + double *descmatrix = &basedescmatrix[Mdesc * nElemAtomsCumSum[elem]]; + double *Proj = &fastpodptr->Proj[nComponents * Mdesc * elem]; + double *centroids = &fastpodptr->Centroids[nComponents * nClusters * elem]; // Calculate covariance matrix A = basedescmatrix*basedescmatrix'. A is a Mdesc x Mdesc matrix - DGEMM(&chn, &cht, &Mdesc, &Mdesc, &nAtoms, &alpha, descmatrix, &Mdesc, descmatrix, &Mdesc, &beta, A, &Mdesc); - MPI_Allreduce(MPI_IN_PLACE, A, Mdesc*Mdesc, MPI_DOUBLE, MPI_SUM, world); + DGEMM(&chn, &cht, &Mdesc, &Mdesc, &nAtoms, &alpha, descmatrix, &Mdesc, descmatrix, &Mdesc, + &beta, A, &Mdesc); + MPI_Allreduce(MPI_IN_PLACE, A, Mdesc * Mdesc, MPI_DOUBLE, MPI_SUM, world); //if (comm->me == 0) print_matrix("A", Mdesc, Mdesc, A, Mdesc); - if ((comm->me == 0) && (save==1)) - savematrix2binfile(data.filenametag + "_covariance_matrix_elem" + std::to_string(elem+1) + ".bin", A, Mdesc, Mdesc); + if ((comm->me == 0) && (save == 1)) + savematrix2binfile(data.filenametag + "_covariance_matrix_elem" + std::to_string(elem + 1) + + ".bin", + A, Mdesc, Mdesc); // Calculate eigenvalues and eigenvectors of A - int lwork = Mdesc * Mdesc; // the length of the array work, lwork >= max(1,3*N-1) - int info = 1; // = 0: successful exit + int lwork = Mdesc * Mdesc; // the length of the array work, lwork >= max(1,3*N-1) + int info = 1; // = 0: successful exit DSYEV(&chv, &chu, &Mdesc, A, &Mdesc, b, work, &lwork, &info); // order eigenvalues and eigenvectors from largest to smallest - for (int i=0; ime == 0) { - savematrix2binfile(data.filenametag + "_eigenvector_matrix_elem" + std::to_string(elem+1) + ".bin", A, Mdesc, Mdesc); - savematrix2binfile(data.filenametag + "_eigenvalues_elem" + std::to_string(elem+1) + ".bin", b, Mdesc, 1); + savematrix2binfile(data.filenametag + "_eigenvector_matrix_elem" + + std::to_string(elem + 1) + ".bin", + A, Mdesc, Mdesc); + savematrix2binfile(data.filenametag + "_eigenvalues_elem" + std::to_string(elem + 1) + + ".bin", + b, Mdesc, 1); } - savematrix2binfile(data.filenametag + "_desc_matrix_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", descmatrix, Mdesc, nAtoms); - savematrix2binfile(data.filenametag + "_pca_matrix_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", pca, nComponents, nAtoms); - saveintmatrix2binfile(data.filenametag + "_cluster_assignments_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", assignments, nAtoms, 1); + savematrix2binfile(data.filenametag + "_desc_matrix_elem" + std::to_string(elem + 1) + + "_proc" + std::to_string(comm->me + 1) + ".bin", + descmatrix, Mdesc, nAtoms); + savematrix2binfile(data.filenametag + "_pca_matrix_elem" + std::to_string(elem + 1) + + "_proc" + std::to_string(comm->me + 1) + ".bin", + pca, nComponents, nAtoms); + saveintmatrix2binfile(data.filenametag + "_cluster_assignments_elem" + + std::to_string(elem + 1) + "_proc" + std::to_string(comm->me + 1) + + ".bin", + assignments, nAtoms, 1); } } @@ -1456,7 +1570,8 @@ void FitPOD::environment_cluster_calculation(const datastruct &data) memory->destroy(nElemAtomsCount); if (comm->me == 0) - utils::logmesg(lmp, "**************** End Calculating Environment Descriptor Matrix ****************\n"); + utils::logmesg( + lmp, "**************** End Calculating Environment Descriptor Matrix ****************\n"); } void FitPOD::least_squares_matrix(const datastruct &data, int ci) @@ -1465,21 +1580,21 @@ void FitPOD::least_squares_matrix(const datastruct &data, int ci) int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; int nCoeffAll = desc.nCoeffAll; - int nforce = dim*natom; + int nforce = dim * natom; // compute energy weight and force weight double normconst = 1.0; - if (data.normalizeenergy==1) normconst = 1.0/natom; + if (data.normalizeenergy == 1) normconst = 1.0 / natom; double we = data.we[ci]; double wf = data.wf[ci]; - double we2 = (we*we)*(normconst*normconst); - double wf2 = (wf*wf); + double we2 = (we * we) * (normconst * normconst); + double wf2 = (wf * wf); // get energy and force from the training data set double energy = data.energy[ci]; - double *force = &data.force[dim*natom_cumsum]; + double *force = &data.force[dim * natom_cumsum]; // least-square matrix for all descriptors: A = A + (we*we)*(gd^T * gd) @@ -1491,13 +1606,13 @@ void FitPOD::least_squares_matrix(const datastruct &data, int ci) char chn = 'N'; double one = 1.0; int inc1 = 1; - DGEMM(&cht, &chn, &nCoeffAll, &nCoeffAll, &nforce, &wf2, desc.gdd, &nforce, desc.gdd, &nforce, &one, desc.A, &nCoeffAll); + DGEMM(&cht, &chn, &nCoeffAll, &nCoeffAll, &nforce, &wf2, desc.gdd, &nforce, desc.gdd, &nforce, + &one, desc.A, &nCoeffAll); // least-square vector for all descriptors: b = b + (we*we*energy)*gd - double wee = we2*energy; - for (int i = 0; i< nCoeffAll; i++) - desc.b[i] += wee*desc.gd[i]; + double wee = we2 * energy; + for (int i = 0; i < nCoeffAll; i++) desc.b[i] += wee * desc.gd[i]; // least-square vector for all descriptors derivatives: b = b + (wf*wf) * (gdd^T * f) @@ -1511,11 +1626,10 @@ void FitPOD::least_squares_fit(const datastruct &data) // loop over each configuration in the training data set - for (int ci=0; ci < (int) data.num_atom.size(); ci++) { + for (int ci = 0; ci < (int) data.num_atom.size(); ci++) { - if ((ci % 100)==0) { - if (comm->me == 0) - utils::logmesg(lmp, "Configuration: # {}\n", ci+1); + if ((ci % 100) == 0) { + if (comm->me == 0) utils::logmesg(lmp, "Configuration: # {}\n", ci + 1); } if ((ci % comm->nprocs) == comm->me) { @@ -1524,11 +1638,13 @@ void FitPOD::least_squares_fit(const datastruct &data) local_descriptors_fastpod(data, ci); if (save_descriptors > 0) { - std::string filename = data.data_path + "/descriptors_config" + std::to_string(ci+1) + ".bin"; + std::string filename = + data.data_path + "/descriptors_config" + std::to_string(ci + 1) + ".bin"; FILE *fp = fopen(filename.c_str(), "wb"); - fwrite( reinterpret_cast( desc.gd ), sizeof(double) * (desc.nCoeffAll), 1, fp); - if (save_descriptors==2) { - fwrite( reinterpret_cast( desc.gdd ), sizeof(double) * (3*data.num_atom[ci]*desc.nCoeffAll), 1, fp); + fwrite(reinterpret_cast(desc.gd), sizeof(double) * (desc.nCoeffAll), 1, fp); + if (save_descriptors == 2) { + fwrite(reinterpret_cast(desc.gdd), + sizeof(double) * (3 * data.num_atom[ci] * desc.nCoeffAll), 1, fp); } fclose(fp); } @@ -1542,31 +1658,32 @@ void FitPOD::least_squares_fit(const datastruct &data) int nCoeffAll = desc.nCoeffAll; MPI_Allreduce(MPI_IN_PLACE, desc.b, nCoeffAll, MPI_DOUBLE, MPI_SUM, world); - MPI_Allreduce(MPI_IN_PLACE, desc.A, nCoeffAll*nCoeffAll, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(MPI_IN_PLACE, desc.A, nCoeffAll * nCoeffAll, MPI_DOUBLE, MPI_SUM, world); if (comm->me == 0) { // symmetrize A - for (int i = 0; ienergyforce(force, nb.y, atomtype, nb.alist, nb.pairlist, - nb.pairnum_cumsum, natom); + nb.pairnum_cumsum, natom); return energy; } void FitPOD::print_analysis(const datastruct &data, double *outarray, double *errors) { - int nfiles = data.data_files.size(); // number of files + int nfiles = data.data_files.size(); // number of files int lm = 10; - for (int i = 0; i < nfiles; i++) - lm = MAX(lm, (int) data.filenames[i].size()); + for (int i = 0; i < nfiles; i++) lm = MAX(lm, (int) data.filenames[i].size()); lm = lm + 2; - std::string filename_errors = fmt::format("{}_{}_errors.pod", data.filenametag, data.training ? "training" : "test"); - std::string filename_analysis = fmt::format("{}_{}_analysis.pod", data.filenametag, data.training ? "training" : "test"); + std::string filename_errors = + fmt::format("{}_{}_errors.pod", data.filenametag, data.training ? "training" : "test"); + std::string filename_analysis = + fmt::format("{}_{}_analysis.pod", data.filenametag, data.training ? "training" : "test"); FILE *fp_errors = nullptr; FILE *fp_analysis = nullptr; fp_errors = fopen(filename_errors.c_str(), "w"); fp_analysis = fopen(filename_analysis.c_str(), "w"); - std::string mystr = fmt::format("**************** Begin of Error Analysis for the {} Data Set ****************\n", - data.training ? "Training" : "Test"); + std::string mystr = + fmt::format("**************** Begin of Error Analysis for the {} Data Set ****************\n", + data.training ? "Training" : "Test"); utils::logmesg(lmp, mystr); fmt::print(fp_errors, mystr); - std::string sa(lm+80,'-'); + std::string sa(lm + 80, '-'); sa += '\n'; - std::string sb = fmt::format(" {:^{}} | # configs | # atoms | MAE energy | RMSE energy | MAE force | RMSE force\n", - "File", lm); + std::string sb = fmt::format( + " {:^{}} | # configs | # atoms | MAE energy | RMSE energy | MAE force | RMSE force\n", + "File", lm); utils::logmesg(lmp, sa + sb + sa); fmt::print(fp_errors, sa + sb + sa); - int ci=0, m=8, nc=0, nf=0; + int ci = 0, m = 8, nc = 0, nf = 0; for (int file = 0; file < nfiles; file++) { fmt::print(fp_analysis, "# {}\n", data.filenames[file]); - fmt::print(fp_analysis, " config # atoms volume energy DFT energy energy error " + fmt::print(fp_analysis, + " config # atoms volume energy DFT energy energy error " " force DFT force force error\n"); int nforceall = 0; int nconfigs = data.num_config[file]; nc += nconfigs; - for (int ii=0; ii < nconfigs; ii++) { // loop over each configuration in a file - fmt::print(fp_analysis, "{:6} {:8} ", outarray[m*ci], outarray[1 + m*ci]); + for (int ii = 0; ii < nconfigs; ii++) { // loop over each configuration in a file + fmt::print(fp_analysis, "{:6} {:8} ", outarray[m * ci], outarray[1 + m * ci]); - double vol = latticevolume(&data.lattice[9*ci]); + double vol = latticevolume(&data.lattice[9 * ci]); fmt::print(fp_analysis, "{:<15.10} ", vol); - for(int count = 2; count < m; count ++) - fmt::print(fp_analysis, "{:<15.10} ", outarray[count + m*ci]); + for (int count = 2; count < m; count++) + fmt::print(fp_analysis, "{:<15.10} ", outarray[count + m * ci]); fmt::print(fp_analysis, "\n"); - nforceall += 3*data.num_atom[ci]; + nforceall += 3 * data.num_atom[ci]; ci += 1; } nf += nforceall; - int q = file+1; - auto s = fmt::format("{:<{}} {:>10} {:>11} {:<10.6f} {:<10.6f} {:<10.6f} {:<10.6f}\n", - data.filenames[file], lm, nconfigs, nforceall/3, - errors[0 + 4*q], errors[1 + 4*q], errors[2 + 4*q], errors[3 + 4*q]); + int q = file + 1; + auto s = + fmt::format("{:<{}} {:>10} {:>11} {:<10.6f} {:<10.6f} {:<10.6f} {:<10.6f}\n", + data.filenames[file], lm, nconfigs, nforceall / 3, errors[0 + 4 * q], + errors[1 + 4 * q], errors[2 + 4 * q], errors[3 + 4 * q]); utils::logmesg(lmp, s); fmt::print(fp_errors, s); } utils::logmesg(lmp, sa); fmt::print(fp_errors, sa); - auto s = fmt::format("{:<{}} {:>10} {:>11} {:<10.6f} {:<10.6f} {:<10.6f} {:<10.6f}\n", - "All files", lm, nc, nf/3, errors[0], errors[1], errors[2], errors[3]); + auto s = + fmt::format("{:<{}} {:>10} {:>11} {:<10.6f} {:<10.6f} {:<10.6f} {:<10.6f}\n", + "All files", lm, nc, nf / 3, errors[0], errors[1], errors[2], errors[3]); utils::logmesg(lmp, s + sa); fmt::print(fp_errors, "{}", s + sa); - mystr = fmt::format("**************** End of Error Analysis for the {} Data Set ****************\n", - data.training ? "Training" : "Test"); + mystr = + fmt::format("**************** End of Error Analysis for the {} Data Set ****************\n", + data.training ? "Training" : "Test"); utils::logmesg(lmp, mystr); fmt::print(fp_errors, mystr); @@ -1697,67 +1821,62 @@ void FitPOD::error_analysis(const datastruct &data, double *coeff) int dim = 3; int nCoeffAll = desc.nCoeffAll; double energy; - std::vector force(dim*data.num_atom_max); + std::vector force(dim * data.num_atom_max); - int nfiles = data.data_files.size(); // number of files - int num_configs = data.num_atom.size(); // number of configurations in all files + int nfiles = data.data_files.size(); // number of files + int num_configs = data.num_atom.size(); // number of configurations in all files int m = 8; - std::vector outarray(m*num_configs); - for (int i=0; i outarray(m * num_configs); + for (int i = 0; i < m * num_configs; i++) outarray[i] = 0.0; std::vector ssrarray(num_configs); - for (int i=0; i errors(4*(nfiles+1)); - for (int i=0; i<4*(nfiles+1); i++) - errors[i] = 0.0; + std::vector errors(4 * (nfiles + 1)); + for (int i = 0; i < 4 * (nfiles + 1); i++) errors[i] = 0.0; std::vector newcoeff(nCoeffAll); - for (int j=0; jme == 0) utils::logmesg(lmp, "**************** Begin of Error Calculation ****************\n"); - int ci = 0; // configuration counter - for (int file = 0; file < nfiles; file++) { // loop over each file in the training data set + int ci = 0; // configuration counter + for (int file = 0; file < nfiles; file++) { // loop over each file in the training data set int nconfigs = data.num_config[file]; - for (int ii=0; ii < nconfigs; ii++) { // loop over each configuration in a file + for (int ii = 0; ii < nconfigs; ii++) { // loop over each configuration in a file - if ((ci % 100)==0) { - if (comm->me == 0) - utils::logmesg(lmp, "Configuration: # {}\n", ci+1); + if ((ci % 100) == 0) { + if (comm->me == 0) utils::logmesg(lmp, "Configuration: # {}\n", ci + 1); } if ((ci % comm->nprocs) == comm->me) { int natom = data.num_atom[ci]; - int nforce = dim*natom; + int nforce = dim * natom; energy = energyforce_calculation_fastpod(force.data(), data, ci); double DFTenergy = data.energy[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; - double *DFTforce = &data.force[dim*natom_cumsum]; + double *DFTforce = &data.force[dim * natom_cumsum]; - outarray[0 + m*ci] = ci+1; - outarray[1 + m*ci] = natom; - outarray[2 + m*ci] = energy; - outarray[3 + m*ci] = DFTenergy; - outarray[4 + m*ci] = fabs(DFTenergy-energy)/natom; - outarray[5 + m*ci] = podArrayNorm(force.data(), nforce); - outarray[6 + m*ci] = podArrayNorm(DFTforce, nforce); + outarray[0 + m * ci] = ci + 1; + outarray[1 + m * ci] = natom; + outarray[2 + m * ci] = energy; + outarray[3 + m * ci] = DFTenergy; + outarray[4 + m * ci] = fabs(DFTenergy - energy) / natom; + outarray[5 + m * ci] = podArrayNorm(force.data(), nforce); + outarray[6 + m * ci] = podArrayNorm(DFTforce, nforce); double diff, sum = 0.0, ssr = 0.0; - for (int j=0; jme == 0) { utils::logmesg(lmp, "**************** End of Error Calculation ****************\n"); @@ -1822,35 +1942,35 @@ void FitPOD::energyforce_calculation(const datastruct &data) { int dim = 3; double energy; - std::vector force(1+dim*data.num_atom_max); + std::vector force(1 + dim * data.num_atom_max); - int nfiles = data.data_files.size(); // number of files + int nfiles = data.data_files.size(); // number of files if (comm->me == 0) utils::logmesg(lmp, "**************** Begin of Energy/Force Calculation ****************\n"); - int ci = 0; // configuration counter - for (int file = 0; file < nfiles; file++) { // loop over each file in the data set + int ci = 0; // configuration counter + for (int file = 0; file < nfiles; file++) { // loop over each file in the data set int nconfigs = data.num_config[file]; - for (int ii=0; ii < nconfigs; ii++) { // loop over each configuration in a file - if ((ci % 100)==0) { - if (comm->me == 0) utils::logmesg(lmp, "Configuration: # {}\n", ci+1); + for (int ii = 0; ii < nconfigs; ii++) { // loop over each configuration in a file + if ((ci % 100) == 0) { + if (comm->me == 0) utils::logmesg(lmp, "Configuration: # {}\n", ci + 1); } int natom = data.num_atom[ci]; - int nforce = dim*natom; + int nforce = dim * natom; if ((ci % comm->nprocs) == comm->me) { - energy = energyforce_calculation_fastpod(force.data()+1, data, ci); + energy = energyforce_calculation_fastpod(force.data() + 1, data, ci); // save energy and force into a binary file force[0] = energy; - std::string filename = "energyforce_config" + std::to_string(ci+1) + ".bin"; + std::string filename = "energyforce_config" + std::to_string(ci + 1) + ".bin"; FILE *fp = fopen(filename.c_str(), "wb"); - fwrite( reinterpret_cast( force.data() ), sizeof(double) * (1 + nforce), 1, fp); + fwrite(reinterpret_cast(force.data()), sizeof(double) * (1 + nforce), 1, fp); fclose(fp); } @@ -1861,100 +1981,88 @@ void FitPOD::energyforce_calculation(const datastruct &data) utils::logmesg(lmp, "**************** End of Energy/Force Calculation ****************\n"); } -void FitPOD::podArrayFill(int* output, int start, int length) +void FitPOD::podArrayFill(int *output, int start, int length) { - for (int j = 0; j < length; ++j) - output[j] = start + j; + for (int j = 0; j < length; ++j) output[j] = start + j; } double FitPOD::podArraySum(double *a, int n) { double e = a[0]; - for (int i=1; ib) - b = a[i]; + for (int i = 1; i < n; i++) + if (a[i] > b) b = a[i]; return b; } int FitPOD::podArrayMin(int *a, int n) { int b = a[0]; - for (int i=1; ib) - b = a[i]; + for (int i = 1; i < n; i++) + if (a[i] > b) b = a[i]; return b; } void FitPOD::podKron(double *C, double *A, double *B, double alpha, int M1, int M2) { - int M = M1*M2; - for (int idx=0; idx( sz ), sizeof(double) * (2), 1, fp); - fwrite( reinterpret_cast( A ), sizeof(double) * (nrows*ncols), 1, fp); + fwrite(reinterpret_cast(sz), sizeof(double) * (2), 1, fp); + fwrite(reinterpret_cast(A), sizeof(double) * (nrows * ncols), 1, fp); fclose(fp); } @@ -2122,31 +2244,28 @@ void FitPOD::saveintmatrix2binfile(std::string filename, int *A, int nrows, int int sz[2]; sz[0] = nrows; sz[1] = ncols; - fwrite( reinterpret_cast( sz ), sizeof(int) * (2), 1, fp); - fwrite( reinterpret_cast( A ), sizeof(int) * (nrows*ncols), 1, fp); + fwrite(reinterpret_cast(sz), sizeof(int) * (2), 1, fp); + fwrite(reinterpret_cast(A), sizeof(int) * (nrows * ncols), 1, fp); fclose(fp); } -void FitPOD::savedata2textfile(std::string filename, std::string text, double *A, int n, int m, int dim) +void FitPOD::savedata2textfile(std::string filename, std::string text, double *A, int n, int m, + int dim) { if (comm->me == 0) { int precision = 15; FILE *fp = fopen(filename.c_str(), "w"); - if (dim==1) { + if (dim == 1) { fmt::print(fp, text, n); - for (int i = 0; i < n; i++) - fmt::print(fp, "{:<10.{}f} \n", A[i], precision); - } - else if (dim==2) { + for (int i = 0; i < n; i++) fmt::print(fp, "{:<10.{}f} \n", A[i], precision); + } else if (dim == 2) { fmt::print(fp, text, n); fmt::print(fp, "{} \n", m); for (int j = 0; j < n; j++) { - for (int i = 0; i < m; i++) - fmt::print(fp, "{:<10.{}f} ", A[j + i*n], precision); + for (int i = 0; i < m; i++) fmt::print(fp, "{:<10.{}f} ", A[j + i * n], precision); fmt::print(fp, " \n"); } } fclose(fp); } } - diff --git a/src/ML-POD/fitpod_command.h b/src/ML-POD/fitpod_command.h index 96e726e0af..b961ddc224 100644 --- a/src/ML-POD/fitpod_command.h +++ b/src/ML-POD/fitpod_command.h @@ -11,7 +11,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ - #ifdef COMMAND_CLASS // clang-format off CommandStyle(fitpod,FitPOD); @@ -27,20 +26,23 @@ CommandStyle(fitpod,FitPOD); namespace LAMMPS_NS { class FitPOD : public Command { -public: + public: FitPOD(LAMMPS *); void command(int, char **) override; -private: + private: struct datastruct { - std::string file_format = "extxyz"; - std::string file_extension = "xyz"; + datastruct(); + void copydatainfo(datastruct &data) const; + + std::string file_format; + std::string file_extension; std::string data_path; - std::vector data_files; // sorted file names - std::vector group_names; // sorted group names + std::vector data_files; // sorted file names + std::vector group_names; // sorted group names std::vector filenames; - std::string filenametag = "pod"; - std::string group_weight_type = "global"; + std::string filenametag; + std::string group_weight_type; std::vector num_atom; std::vector num_atom_cumsum; @@ -52,86 +54,65 @@ private: int num_atom_max; int num_config_sum; - double *lattice=nullptr; - double *energy=nullptr; - double *stress=nullptr; - double *position=nullptr; - double *force=nullptr; - int *atomtype=nullptr; + double *lattice; + double *energy; + double *stress; + double *position; + double *force; + int *atomtype; // Group weights will have same size as energy. - double *we=nullptr; - double *wf=nullptr; + double *we; + double *wf; - int training = 1; - int normalizeenergy = 1; - int training_analysis = 1; - int test_analysis = 1; - int training_calculation = 0; - int test_calculation = 0; - int randomize = 1; - int precision = 8; - double fraction = 1.0; + int training; + int normalizeenergy; + int training_analysis; + int test_analysis; + int training_calculation; + int test_calculation; + int randomize; + int precision; + double fraction; std::unordered_map we_map; std::unordered_map wf_map; - double fitting_weights[12] = {100.0, 1.0, 0.0, 1, 1, 0, 0, 1, 1, 1, 1, 1e-10}; - - void copydatainfo(datastruct &data) const - { - data.data_path = data_path; - data.file_format = file_format; - data.file_extension = file_extension; - data.data_files = data_files; - data.filenametag = filenametag; - data.filenames = filenames; - data.training_analysis = training_analysis; - data.test_analysis = test_analysis; - data.training_calculation = training_calculation; - data.test_calculation = test_calculation; - data.fraction = fraction; - data.randomize = randomize; - data.precision = precision; - data.training = training; - data.normalizeenergy = normalizeenergy; - for (int i = 0; i < 12; i++) - data.fitting_weights[i] = fitting_weights[i]; - data.we_map = we_map; - data.wf_map = wf_map; - } + double fitting_weights[12]; }; struct neighborstruct { - int *alist=nullptr; - int *pairnum=nullptr; - int *pairnum_cumsum=nullptr; - int *pairlist=nullptr; - double *y=nullptr; + neighborstruct(); - //int natom; - //int nalist; - int natom_max = 0; - int sze = 0; - int sza = 0; - int szy = 0; - int szp = 0; + int *alist; + int *pairnum; + int *pairnum_cumsum; + int *pairlist; + double *y; + + int natom_max; + int sze; + int sza; + int szy; + int szp; }; struct descriptorstruct { - double *bd=nullptr; // base descriptors - double *pd=nullptr; // multi-environment descriptors (probabilities) - double *gd=nullptr; // global descriptors - double *gdd=nullptr; // derivatives of global descriptors and peratom descriptors - double *A=nullptr; // least-square matrix for all descriptors - double *b=nullptr; // least-square vector for all descriptors - double *c=nullptr; // coefficents of descriptors - int szd = 0; - int nCoeffAll = 0; // number of global descriptors - int nClusters = 0; // number of environment clusters + descriptorstruct(); + + double *bd; // base descriptors + double *pd; // multi-environment descriptors (probabilities) + double *gd; // global descriptors + double *gdd; // derivatives of global descriptors and peratom descriptors + double *A; // least-square matrix for all descriptors + double *b; // least-square vector for all descriptors + double *c; // coefficents of descriptors + int szd; + int nCoeffAll; // number of global descriptors + int nClusters; // number of environment clusters }; - int save_descriptors = 0; - int compute_descriptors = 0; + int save_descriptors; + int compute_descriptors; datastruct traindata; datastruct testdata; datastruct envdata; @@ -159,9 +140,12 @@ private: void matrix33_inverse(double *invA, double *A1, double *A2, double *A3); double squareDistance(const double *a, const double *b, int DIMENSIONS); - void assignPointsToClusters(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSION); - void updateCentroids(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS); - void KmeansClustering(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS, int MAX_ITER); + void assignPointsToClusters(double *points, double *centroids, int *assignments, + int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSION); + void updateCentroids(double *points, double *centroids, int *assignments, int *clusterSizes, + int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS); + void KmeansClustering(double *points, double *centroids, int *assignments, int *clusterSizes, + int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS, int MAX_ITER); void savedata2textfile(std::string filename, std::string text, double *A, int n, int m, int dim); void savematrix2binfile(std::string filename, double *A, int nrows, int ncols); @@ -169,26 +153,33 @@ private: // functions for reading input files and fitting - int read_data_file(double *fitting_weights, std::string &file_format, std::string &file_extension, std::string &env_path, - std::string &test_path, std::string &training_path, std::string &filenametag, const std::string &data_file, std::string &group_weight_type, - std::unordered_map &we_map, std::unordered_map &wf_map); - void get_exyz_files(std::vector &, std::vector &, const std::string &, const std::string &); - int get_number_atom_exyz(std::vector& num_atom, int& num_atom_sum, std::string file); - int get_number_atoms(std::vector& num_atom, std::vector &num_atom_sum, std::vector& num_config, std::vector training_files); - void read_exyz_file(double *lattice, double *stress, double *energy, double *we, double *wf, double *pos, double *forces, - int *atomtype, std::string file, std::vector species, double we_group, double wf_group); + int read_data_file(double *fitting_weights, std::string &file_format, std::string &file_extension, + std::string &env_path, std::string &test_path, std::string &training_path, + std::string &filenametag, const std::string &data_file, + std::string &group_weight_type, + std::unordered_map &we_map, + std::unordered_map &wf_map); + void get_exyz_files(std::vector &, std::vector &, const std::string &, + const std::string &); + int get_number_atom_exyz(std::vector &num_atom, int &num_atom_sum, std::string file); + int get_number_atoms(std::vector &num_atom, std::vector &num_atom_sum, + std::vector &num_config, std::vector training_files); + void read_exyz_file(double *lattice, double *stress, double *energy, double *we, double *wf, + double *pos, double *forces, int *atomtype, std::string file, + std::vector species, double we_group, double wf_group); void get_data(datastruct &data, const std::vector &species); std::vector linspace(int start_in, int end_in, int num_in); std::vector shuffle(int start_in, int end_in, int num_in); std::vector select(int n, double fraction, int randomize); void select_data(datastruct &newdata, const datastruct &data); - void read_data_files(const std::string& data_file, const std::vector& species); + void read_data_files(const std::string &data_file, const std::vector &species); int latticecoords(double *y, int *alist, double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, int nx); int podneighborlist(int *neighlist, int *numneigh, double *r, double rcutsq, int nx, int N, int dim); int podfullneighborlist(double *y, int *alist, int *neighlist, int *numneigh, int *numneighsum, - double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, int nx); + double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, + int nx); void estimate_memory_neighborstruct(const datastruct &data, int *pbc, double rcut, int nelements); void allocate_memory_neighborstruct(); void allocate_memory_descriptorstruct(int nd); @@ -204,8 +195,6 @@ private: double energyforce_calculation_fastpod(double *force, const datastruct &data, int ci); void energyforce_calculation(const datastruct &data); }; - } // namespace LAMMPS_NS - #endif #endif