Files
lammps-gran-kokkos/lib/colvars/colvar_neuralnetworkcompute.cpp
2024-08-06 01:07:43 +02:00

311 lines
12 KiB
C++

// -*- Mode:c++; c-basic-offset: 4; -*-
// This file is part of the Collective Variables module (Colvars).
// The original version of Colvars and its updates are located at:
// https://github.com/Colvars/colvars
// Please update all Colvars source files before making any changes.
// If you wish to distribute your changes, please submit them to the
// Colvars repository at GitHub.
#include <iostream>
#include <fstream>
#include "colvar_neuralnetworkcompute.h"
#include "colvarparse.h"
#include "colvarproxy.h"
namespace neuralnetworkCV {
std::map<std::string, std::pair<std::function<double(double)>, std::function<double(double)>>> activation_function_map
{
{"tanh", {[](double x){return std::tanh(x);},
[](double x){return 1.0 - std::tanh(x) * std::tanh(x);}}},
{"sigmoid", {[](double x){return 1.0 / (1.0 + std::exp(-x));},
[](double x){return std::exp(-x) / ((1.0 + std::exp(-x)) * (1.0 + std::exp(-x)));}}},
{"linear", {[](double x){return x;},
[](double /*x*/){return 1.0;}}},
{"relu", {[](double x){return x < 0. ? 0. : x;},
[](double x){return x < 0. ? 0. : 1.;}}},
{"lrelu100", {[](double x){return x < 0. ? 0.01 * x : x;},
[](double x){return x < 0. ? 0.01 : 1.;}}},
{"elu", {[](double x){return x < 0. ? std::exp(x)-1. : x;},
[](double x){return x < 0. ? std::exp(x) : 1.;}}}
};
#ifdef LEPTON
customActivationFunction::customActivationFunction():
expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
input_reference(nullptr), derivative_reference(nullptr) {}
customActivationFunction::customActivationFunction(const std::string& expression_string):
expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
input_reference(nullptr), derivative_reference(nullptr) {
setExpression(expression_string);
}
customActivationFunction::customActivationFunction(const customActivationFunction& source):
expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
input_reference(nullptr), derivative_reference(nullptr) {
// check if the source object is initialized
if (source.value_evaluator != nullptr) {
this->setExpression(source.expression);
}
}
customActivationFunction& customActivationFunction::operator=(const customActivationFunction& source) {
if (source.value_evaluator != nullptr) {
this->setExpression(source.expression);
} else {
expression = std::string();
value_evaluator = nullptr;
gradient_evaluator = nullptr;
input_reference = nullptr;
derivative_reference = nullptr;
}
return *this;
}
void customActivationFunction::setExpression(const std::string& expression_string) {
expression = expression_string;
Lepton::ParsedExpression parsed_expression;
// the variable must be "x" for the input of an activation function
const std::string activation_input_variable{"x"};
// parse the expression
try {
parsed_expression = Lepton::Parser::parse(expression);
} catch (...) {
cvm::error("Error parsing or compiling expression \"" + expression + "\".\n", COLVARS_INPUT_ERROR);
}
// compile the expression
try {
value_evaluator = std::unique_ptr<Lepton::CompiledExpression>(new Lepton::CompiledExpression(parsed_expression.createCompiledExpression()));
} catch (...) {
cvm::error("Error compiling expression \"" + expression + "\".\n", COLVARS_INPUT_ERROR);
}
// create a compiled expression for the derivative
try {
gradient_evaluator = std::unique_ptr<Lepton::CompiledExpression>(new Lepton::CompiledExpression(parsed_expression.differentiate(activation_input_variable).createCompiledExpression()));
} catch (...) {
cvm::error("Error creating compiled expression for variable \"" + activation_input_variable + "\".\n", COLVARS_INPUT_ERROR);
}
// get the reference to the input variable in the compiled expression
try {
input_reference = &(value_evaluator->getVariableReference(activation_input_variable));
} catch (...) {
cvm::error("Error on getting the reference to variable \"" + activation_input_variable + "\" in the compiled expression.\n", COLVARS_INPUT_ERROR);
}
// get the reference to the input variable in the compiled derivative expression
try {
derivative_reference = &(gradient_evaluator->getVariableReference(activation_input_variable));
} catch (...) {
cvm::error("Error on getting the reference to variable \"" + activation_input_variable + "\" in the compiled derivative exprssion.\n", COLVARS_INPUT_ERROR);
}
}
std::string customActivationFunction::getExpression() const {
return expression;
}
double customActivationFunction::evaluate(double x) const {
*input_reference = x;
return value_evaluator->evaluate();
}
double customActivationFunction::derivative(double x) const {
*derivative_reference = x;
return gradient_evaluator->evaluate();
}
#endif
denseLayer::denseLayer(const std::string& weights_file, const std::string& biases_file, const std::function<double(double)>& f, const std::function<double(double)>& df): m_activation_function(f), m_activation_function_derivative(df) {
#ifdef LEPTON
m_use_custom_activation = false;
#endif
readFromFile(weights_file, biases_file);
}
#ifdef LEPTON
denseLayer::denseLayer(const std::string& weights_file, const std::string& biases_file, const std::string& custom_activation_expression) {
m_use_custom_activation = true;
m_custom_activation_function = customActivationFunction(custom_activation_expression);
readFromFile(weights_file, biases_file);
}
#endif
void denseLayer::readFromFile(const std::string& weights_file, const std::string& biases_file) {
// parse weights file
m_weights.clear();
m_biases.clear();
std::string line;
colvarproxy *proxy = cvm::main()->proxy;
auto &ifs_weights = proxy->input_stream(weights_file, "weights file");
while (std::getline(ifs_weights, line)) {
if (!ifs_weights) {
throw std::runtime_error("I/O error while reading " + weights_file);
}
std::vector<std::string> splitted_data;
colvarparse::split_string(line, std::string{" "}, splitted_data);
if (splitted_data.size() > 0) {
std::vector<double> weights_tmp(splitted_data.size());
for (size_t i = 0; i < splitted_data.size(); ++i) {
try {
weights_tmp[i] = std::stod(splitted_data[i]);
} catch (...) {
throw std::runtime_error("Cannot convert " + splitted_data[i] + " to a number while reading file " + weights_file);
}
}
m_weights.push_back(weights_tmp);
}
}
proxy->close_input_stream(weights_file);
// parse biases file
auto &ifs_biases = proxy->input_stream(biases_file, "biases file");
while (std::getline(ifs_biases, line)) {
if (!ifs_biases) {
throw std::runtime_error("I/O error while reading " + biases_file);
}
std::vector<std::string> splitted_data;
colvarparse::split_string(line, std::string{" "}, splitted_data);
if (splitted_data.size() > 0) {
double bias = 0;
try {
bias = std::stod(splitted_data[0]);
} catch (...) {
throw std::runtime_error("Cannot convert " + splitted_data[0] + " to a number while reading file " + biases_file);
}
m_biases.push_back(bias);
}
}
proxy->close_input_stream(biases_file);
m_input_size = m_weights[0].size();
m_output_size = m_weights.size();
}
void denseLayer::setActivationFunction(const std::function<double(double)>& f, const std::function<double(double)>& df) {
m_activation_function = f;
m_activation_function_derivative = df;
}
void denseLayer::compute(const std::vector<double>& input, std::vector<double>& output) const {
for (size_t i = 0; i < m_output_size; ++i) {
output[i] = 0;
for (size_t j = 0; j < m_input_size; ++j) {
output[i] += input[j] * m_weights[i][j];
}
output[i] += m_biases[i];
#ifdef LEPTON
if (m_use_custom_activation) {
output[i] = m_custom_activation_function.evaluate(output[i]);
} else {
#endif
output[i] = m_activation_function(output[i]);
#ifdef LEPTON
}
#endif
}
}
double denseLayer::computeGradientElement(const std::vector<double>& input, const size_t i, const size_t j) const {
double sum_with_bias = 0;
for (size_t j_in = 0; j_in < m_input_size; ++j_in) {
sum_with_bias += input[j_in] * m_weights[i][j_in];
}
sum_with_bias += m_biases[i];
#ifdef LEPTON
if (m_use_custom_activation) {
const double grad_ij = m_custom_activation_function.derivative(sum_with_bias) * m_weights[i][j];
return grad_ij;
} else {
#endif
const double grad_ij = m_activation_function_derivative(sum_with_bias) * m_weights[i][j];
return grad_ij;
#ifdef LEPTON
}
#endif
}
void denseLayer::computeGradient(const std::vector<double>& input, std::vector<std::vector<double>>& output_grad) const {
for (size_t j = 0; j < m_input_size; ++j) {
for (size_t i = 0; i < m_output_size; ++i) {
output_grad[i][j] = computeGradientElement(input, i, j);
}
}
}
neuralNetworkCompute::neuralNetworkCompute(const std::vector<denseLayer>& dense_layers): m_dense_layers(dense_layers) {
m_layers_output.resize(m_dense_layers.size());
m_grads_tmp.resize(m_dense_layers.size());
for (size_t i_layer = 0; i_layer < m_layers_output.size(); ++i_layer) {
m_layers_output[i_layer].assign(m_dense_layers[i_layer].getOutputSize(), 0);
m_grads_tmp[i_layer].assign(m_dense_layers[i_layer].getOutputSize(), std::vector<double>(m_dense_layers[i_layer].getInputSize(), 0));
}
}
bool neuralNetworkCompute::addDenseLayer(const denseLayer& layer) {
if (m_dense_layers.empty()) {
// add layer to this ann directly if m_dense_layers is empty
m_dense_layers.push_back(layer);
m_layers_output.push_back(std::vector<double>(layer.getOutputSize()));
m_grads_tmp.push_back(std::vector<std::vector<double>>(layer.getOutputSize(), std::vector<double>(layer.getInputSize(), 0)));
return true;
} else {
// otherwise, we need to check if the output of last layer in m_dense_layers matches the input of layer to be added
if (m_dense_layers.back().getOutputSize() == layer.getInputSize()) {
m_dense_layers.push_back(layer);
m_layers_output.push_back(std::vector<double>(layer.getOutputSize()));
m_grads_tmp.push_back(std::vector<std::vector<double>>(layer.getOutputSize(), std::vector<double>(layer.getInputSize(), 0)));
return true;
} else {
return false;
}
}
}
std::vector<std::vector<double>> neuralNetworkCompute::multiply_matrix(const std::vector<std::vector<double>>& A, const std::vector<std::vector<double>>& B) {
const size_t m = A.size();
const size_t n = B.size();
if (A[0].size() != n) {
std::cerr << "Error on multiplying matrices!\n";
}
const size_t t = B[0].size();
std::vector<std::vector<double>> C(m, std::vector<double>(t, 0.0));
for (size_t i = 0; i < m; ++i) {
for (size_t k = 0; k < n; ++k) {
const auto tmp = A[i][k];
auto& C_i = C[i];
auto& B_k = B[k];
for (size_t j = 0; j < t; ++j) {
C_i[j] += tmp * B_k[j];
}
}
}
return C;
}
void neuralNetworkCompute::compute() {
if (m_dense_layers.empty()) {
return;
}
size_t i_layer;
m_dense_layers[0].compute(m_input, m_layers_output[0]);
for (i_layer = 1; i_layer < m_dense_layers.size(); ++i_layer) {
m_dense_layers[i_layer].compute(m_layers_output[i_layer - 1], m_layers_output[i_layer]);
}
// gradients of each layer
m_dense_layers[0].computeGradient(m_input, m_grads_tmp[0]);
for (i_layer = 1; i_layer < m_dense_layers.size(); ++i_layer) {
m_dense_layers[i_layer].computeGradient(m_layers_output[i_layer - 1], m_grads_tmp[i_layer]);
}
// chain rule
if (m_dense_layers.size() > 1) {
m_chained_grad = multiply_matrix(m_grads_tmp[1], m_grads_tmp[0]);
for (i_layer = 2; i_layer < m_dense_layers.size(); ++i_layer) {
m_chained_grad = multiply_matrix(m_grads_tmp[i_layer], m_chained_grad);
}
} else {
m_chained_grad = m_grads_tmp[0];
}
}
}