This update includes one new feature (neural-network based collective variables), several small enhancements (including an automatic definition of grid boundaries for angle-based CVs, and a normalization option for eigenvector-based CVs), bugfixes and documentation improvements. Usage information for specific features included in the Colvars library (i.e. not just the library as a whole) is now also reported to the screen or LAMMPS logfile (as is done already in other LAMMPS classes). Notable to LAMMPS code development are the removals of duplicated code and of ambiguously-named preprocessor defines in the Colvars headers. Since the last PR, the existing regression tests have also been running automatically via GitHub Actions. The following pull requests in the Colvars repository are relevant to LAMMPS: - 475 Remove fatal error condition https://github.com/Colvars/colvars/pull/475 (@jhenin, @giacomofiorin) - 474 Allow normalizing eigenvector vector components to deal with unit change https://github.com/Colvars/colvars/pull/474 (@giacomofiorin, @jhenin) - 470 Better error handling in the initialization of NeuralNetwork CV https://github.com/Colvars/colvars/pull/470 (@HanatoK) - 468 Add examples of histogram configuration, with and without explicit grid parameters https://github.com/Colvars/colvars/pull/468 (@giacomofiorin) - 464 Fix #463 using more fine-grained features https://github.com/Colvars/colvars/pull/464 (@jhenin, @giacomofiorin) - 447 [RFC] New option "scaledBiasingForce" for colvarbias https://github.com/Colvars/colvars/pull/447 (@HanatoK, @jhenin) - 444 [RFC] Implementation of dense neural network as CV https://github.com/Colvars/colvars/pull/444 (@HanatoK, @giacomofiorin, @jhenin) - 443 Fix explicit gradient dependency of sub-CVs https://github.com/Colvars/colvars/pull/443 (@HanatoK, @jhenin) - 442 Persistent bias count https://github.com/Colvars/colvars/pull/442 (@jhenin, @giacomofiorin) - 437 Return type of bias from scripting interface https://github.com/Colvars/colvars/pull/437 (@giacomofiorin) - 434 More flexible use of boundaries from colvars by grids https://github.com/Colvars/colvars/pull/434 (@jhenin) - 433 Prevent double-free in linearCombination https://github.com/Colvars/colvars/pull/433 (@HanatoK) - 428 More complete documentation for index file format (NDX) https://github.com/Colvars/colvars/pull/428 (@giacomofiorin) - 426 Integrate functional version of backup_file() into base proxy class https://github.com/Colvars/colvars/pull/426 (@giacomofiorin) - 424 Track CVC inheritance when documenting feature usage https://github.com/Colvars/colvars/pull/424 (@giacomofiorin) - 419 Generate citation report while running computations https://github.com/Colvars/colvars/pull/419 (@giacomofiorin, @jhenin) - 415 Rebin metadynamics bias from explicit hills when available https://github.com/Colvars/colvars/pull/415 (@giacomofiorin) - 312 Ignore a keyword if it has content to the left of it (regardless of braces) https://github.com/Colvars/colvars/pull/312 (@giacomofiorin) Authors: @giacomofiorin, @HanatoK, @jhenin
299 lines
12 KiB
C++
299 lines
12 KiB
C++
#if (__cplusplus >= 201103L)
|
|
#include "colvar_neuralnetworkcompute.h"
|
|
#include "colvarparse.h"
|
|
|
|
namespace neuralnetworkCV {
|
|
std::map<std::string, std::pair<std::function<double(double)>, std::function<double(double)>>> activation_function_map
|
|
{
|
|
{"tanh", {[](double x){return std::tanh(x);},
|
|
[](double x){return 1.0 - std::tanh(x) * std::tanh(x);}}},
|
|
{"sigmoid", {[](double x){return 1.0 / (1.0 + std::exp(-x));},
|
|
[](double x){return std::exp(-x) / ((1.0 + std::exp(-x)) * (1.0 + std::exp(-x)));}}},
|
|
{"linear", {[](double x){return x;},
|
|
[](double /*x*/){return 1.0;}}},
|
|
{"relu", {[](double x){return x < 0. ? 0. : x;},
|
|
[](double x){return x < 0. ? 0. : 1.;}}},
|
|
{"lrelu100", {[](double x){return x < 0. ? 0.01 * x : x;},
|
|
[](double x){return x < 0. ? 0.01 : 1.;}}},
|
|
{"elu", {[](double x){return x < 0. ? std::exp(x)-1. : x;},
|
|
[](double x){return x < 0. ? std::exp(x) : 1.;}}}
|
|
};
|
|
|
|
#ifdef LEPTON
|
|
customActivationFunction::customActivationFunction():
|
|
expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
|
|
input_reference(nullptr), derivative_reference(nullptr) {}
|
|
|
|
customActivationFunction::customActivationFunction(const std::string& expression_string):
|
|
expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
|
|
input_reference(nullptr), derivative_reference(nullptr) {
|
|
setExpression(expression_string);
|
|
}
|
|
|
|
customActivationFunction::customActivationFunction(const customActivationFunction& source):
|
|
expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
|
|
input_reference(nullptr), derivative_reference(nullptr) {
|
|
// check if the source object is initialized
|
|
if (source.value_evaluator != nullptr) {
|
|
this->setExpression(source.expression);
|
|
}
|
|
}
|
|
|
|
customActivationFunction& customActivationFunction::operator=(const customActivationFunction& source) {
|
|
if (source.value_evaluator != nullptr) {
|
|
this->setExpression(source.expression);
|
|
} else {
|
|
expression = std::string();
|
|
value_evaluator = nullptr;
|
|
gradient_evaluator = nullptr;
|
|
input_reference = nullptr;
|
|
derivative_reference = nullptr;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
void customActivationFunction::setExpression(const std::string& expression_string) {
|
|
expression = expression_string;
|
|
Lepton::ParsedExpression parsed_expression;
|
|
// the variable must be "x" for the input of an activation function
|
|
const std::string activation_input_variable{"x"};
|
|
// parse the expression
|
|
try {
|
|
parsed_expression = Lepton::Parser::parse(expression);
|
|
} catch (...) {
|
|
cvm::error("Error parsing or compiling expression \"" + expression + "\".\n", COLVARS_INPUT_ERROR);
|
|
}
|
|
// compile the expression
|
|
try {
|
|
value_evaluator = std::unique_ptr<Lepton::CompiledExpression>(new Lepton::CompiledExpression(parsed_expression.createCompiledExpression()));
|
|
} catch (...) {
|
|
cvm::error("Error compiling expression \"" + expression + "\".\n", COLVARS_INPUT_ERROR);
|
|
}
|
|
// create a compiled expression for the derivative
|
|
try {
|
|
gradient_evaluator = std::unique_ptr<Lepton::CompiledExpression>(new Lepton::CompiledExpression(parsed_expression.differentiate(activation_input_variable).createCompiledExpression()));
|
|
} catch (...) {
|
|
cvm::error("Error creating compiled expression for variable \"" + activation_input_variable + "\".\n", COLVARS_INPUT_ERROR);
|
|
}
|
|
// get the reference to the input variable in the compiled expression
|
|
try {
|
|
input_reference = &(value_evaluator->getVariableReference(activation_input_variable));
|
|
} catch (...) {
|
|
cvm::error("Error on getting the reference to variable \"" + activation_input_variable + "\" in the compiled expression.\n", COLVARS_INPUT_ERROR);
|
|
}
|
|
// get the reference to the input variable in the compiled derivative expression
|
|
try {
|
|
derivative_reference = &(gradient_evaluator->getVariableReference(activation_input_variable));
|
|
} catch (...) {
|
|
cvm::error("Error on getting the reference to variable \"" + activation_input_variable + "\" in the compiled derivative exprssion.\n", COLVARS_INPUT_ERROR);
|
|
}
|
|
}
|
|
|
|
std::string customActivationFunction::getExpression() const {
|
|
return expression;
|
|
}
|
|
|
|
double customActivationFunction::evaluate(double x) const {
|
|
*input_reference = x;
|
|
return value_evaluator->evaluate();
|
|
}
|
|
|
|
double customActivationFunction::derivative(double x) const {
|
|
*derivative_reference = x;
|
|
return gradient_evaluator->evaluate();
|
|
}
|
|
#endif
|
|
|
|
denseLayer::denseLayer(const std::string& weights_file, const std::string& biases_file, const std::function<double(double)>& f, const std::function<double(double)>& df): m_activation_function(f), m_activation_function_derivative(df) {
|
|
#ifdef LEPTON
|
|
m_use_custom_activation = false;
|
|
#endif
|
|
readFromFile(weights_file, biases_file);
|
|
}
|
|
|
|
#ifdef LEPTON
|
|
denseLayer::denseLayer(const std::string& weights_file, const std::string& biases_file, const std::string& custom_activation_expression) {
|
|
m_use_custom_activation = true;
|
|
m_custom_activation_function = customActivationFunction(custom_activation_expression);
|
|
readFromFile(weights_file, biases_file);
|
|
}
|
|
#endif
|
|
|
|
void denseLayer::readFromFile(const std::string& weights_file, const std::string& biases_file) {
|
|
// parse weights file
|
|
m_weights.clear();
|
|
m_biases.clear();
|
|
std::string line;
|
|
std::ifstream ifs_weights(weights_file.c_str());
|
|
if (!ifs_weights) {
|
|
throw std::runtime_error("Cannot open file " + weights_file);
|
|
}
|
|
while (std::getline(ifs_weights, line)) {
|
|
if (ifs_weights.bad()) {
|
|
throw std::runtime_error("I/O error while reading " + weights_file);
|
|
}
|
|
std::vector<std::string> splitted_data;
|
|
colvarparse::split_string(line, std::string{" "}, splitted_data);
|
|
if (splitted_data.size() > 0) {
|
|
std::vector<double> weights_tmp(splitted_data.size());
|
|
for (size_t i = 0; i < splitted_data.size(); ++i) {
|
|
try {
|
|
weights_tmp[i] = std::stod(splitted_data[i]);
|
|
} catch (...) {
|
|
throw std::runtime_error("Cannot convert " + splitted_data[i] + " to a number while reading file " + weights_file);
|
|
}
|
|
}
|
|
m_weights.push_back(weights_tmp);
|
|
}
|
|
}
|
|
// parse biases file
|
|
std::ifstream ifs_biases(biases_file.c_str());
|
|
if (!ifs_biases) {
|
|
throw std::runtime_error("Cannot open file " + biases_file);
|
|
}
|
|
while (std::getline(ifs_biases, line)) {
|
|
if (ifs_biases.bad()) {
|
|
throw std::runtime_error("I/O error while reading " + biases_file);
|
|
}
|
|
std::vector<std::string> splitted_data;
|
|
colvarparse::split_string(line, std::string{" "}, splitted_data);
|
|
if (splitted_data.size() > 0) {
|
|
double bias = 0;
|
|
try {
|
|
bias = std::stod(splitted_data[0]);
|
|
} catch (...) {
|
|
throw std::runtime_error("Cannot convert " + splitted_data[0] + " to a number while reading file " + biases_file);
|
|
}
|
|
m_biases.push_back(bias);
|
|
}
|
|
}
|
|
m_input_size = m_weights[0].size();
|
|
m_output_size = m_weights.size();
|
|
}
|
|
|
|
void denseLayer::setActivationFunction(const std::function<double(double)>& f, const std::function<double(double)>& df) {
|
|
m_activation_function = f;
|
|
m_activation_function_derivative = df;
|
|
}
|
|
|
|
void denseLayer::compute(const std::vector<double>& input, std::vector<double>& output) const {
|
|
for (size_t i = 0; i < m_output_size; ++i) {
|
|
output[i] = 0;
|
|
for (size_t j = 0; j < m_input_size; ++j) {
|
|
output[i] += input[j] * m_weights[i][j];
|
|
}
|
|
output[i] += m_biases[i];
|
|
#ifdef LEPTON
|
|
if (m_use_custom_activation) {
|
|
output[i] = m_custom_activation_function.evaluate(output[i]);
|
|
} else {
|
|
#endif
|
|
output[i] = m_activation_function(output[i]);
|
|
#ifdef LEPTON
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
double denseLayer::computeGradientElement(const std::vector<double>& input, const size_t i, const size_t j) const {
|
|
double sum_with_bias = 0;
|
|
for (size_t j_in = 0; j_in < m_input_size; ++j_in) {
|
|
sum_with_bias += input[j_in] * m_weights[i][j_in];
|
|
}
|
|
sum_with_bias += m_biases[i];
|
|
#ifdef LEPTON
|
|
if (m_use_custom_activation) {
|
|
const double grad_ij = m_custom_activation_function.derivative(sum_with_bias) * m_weights[i][j];
|
|
return grad_ij;
|
|
} else {
|
|
#endif
|
|
const double grad_ij = m_activation_function_derivative(sum_with_bias) * m_weights[i][j];
|
|
return grad_ij;
|
|
#ifdef LEPTON
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void denseLayer::computeGradient(const std::vector<double>& input, std::vector<std::vector<double>>& output_grad) const {
|
|
for (size_t j = 0; j < m_input_size; ++j) {
|
|
for (size_t i = 0; i < m_output_size; ++i) {
|
|
output_grad[i][j] = computeGradientElement(input, i, j);
|
|
}
|
|
}
|
|
}
|
|
|
|
neuralNetworkCompute::neuralNetworkCompute(const std::vector<denseLayer>& dense_layers): m_dense_layers(dense_layers) {
|
|
m_layers_output.resize(m_dense_layers.size());
|
|
m_grads_tmp.resize(m_dense_layers.size());
|
|
for (size_t i_layer = 0; i_layer < m_layers_output.size(); ++i_layer) {
|
|
m_layers_output[i_layer].assign(m_dense_layers[i_layer].getOutputSize(), 0);
|
|
m_grads_tmp[i_layer].assign(m_dense_layers[i_layer].getOutputSize(), std::vector<double>(m_dense_layers[i_layer].getInputSize(), 0));
|
|
}
|
|
}
|
|
|
|
bool neuralNetworkCompute::addDenseLayer(const denseLayer& layer) {
|
|
if (m_dense_layers.empty()) {
|
|
// add layer to this ann directly if m_dense_layers is empty
|
|
m_dense_layers.push_back(layer);
|
|
m_layers_output.push_back(std::vector<double>(layer.getOutputSize()));
|
|
m_grads_tmp.push_back(std::vector<std::vector<double>>(layer.getOutputSize(), std::vector<double>(layer.getInputSize(), 0)));
|
|
return true;
|
|
} else {
|
|
// otherwise, we need to check if the output of last layer in m_dense_layers matches the input of layer to be added
|
|
if (m_dense_layers.back().getOutputSize() == layer.getInputSize()) {
|
|
m_dense_layers.push_back(layer);
|
|
m_layers_output.push_back(std::vector<double>(layer.getOutputSize()));
|
|
m_grads_tmp.push_back(std::vector<std::vector<double>>(layer.getOutputSize(), std::vector<double>(layer.getInputSize(), 0)));
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<std::vector<double>> neuralNetworkCompute::multiply_matrix(const std::vector<std::vector<double>>& A, const std::vector<std::vector<double>>& B) {
|
|
const size_t m = A.size();
|
|
const size_t n = B.size();
|
|
if (A[0].size() != n) {
|
|
std::cerr << "Error on multiplying matrices!\n";
|
|
}
|
|
const size_t t = B[0].size();
|
|
std::vector<std::vector<double>> C(m, std::vector<double>(t, 0.0));
|
|
for (size_t i = 0; i < m; ++i) {
|
|
for (size_t j = 0; j < t; ++j) {
|
|
for (size_t k = 0; k < n; ++k) {
|
|
C[i][j] += A[i][k] * B[k][j];
|
|
}
|
|
}
|
|
}
|
|
return C;
|
|
}
|
|
|
|
void neuralNetworkCompute::compute() {
|
|
if (m_dense_layers.empty()) {
|
|
return;
|
|
}
|
|
size_t i_layer;
|
|
m_dense_layers[0].compute(m_input, m_layers_output[0]);
|
|
for (i_layer = 1; i_layer < m_dense_layers.size(); ++i_layer) {
|
|
m_dense_layers[i_layer].compute(m_layers_output[i_layer - 1], m_layers_output[i_layer]);
|
|
}
|
|
// gradients of each layer
|
|
m_dense_layers[0].computeGradient(m_input, m_grads_tmp[0]);
|
|
for (i_layer = 1; i_layer < m_dense_layers.size(); ++i_layer) {
|
|
m_dense_layers[i_layer].computeGradient(m_layers_output[i_layer - 1], m_grads_tmp[i_layer]);
|
|
}
|
|
// chain rule
|
|
if (m_dense_layers.size() > 1) {
|
|
m_chained_grad = multiply_matrix(m_grads_tmp[1], m_grads_tmp[0]);
|
|
for (i_layer = 2; i_layer < m_dense_layers.size(); ++i_layer) {
|
|
m_chained_grad = multiply_matrix(m_grads_tmp[i_layer], m_chained_grad);
|
|
}
|
|
} else {
|
|
m_chained_grad = m_grads_tmp[0];
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|