Merge remote-tracking branch 'lammps/develop' into electrode

This commit is contained in:
Ludwig Ahrens
2022-04-26 10:05:30 +02:00
2174 changed files with 27865 additions and 46669 deletions

View File

@ -775,7 +775,7 @@ namespace ATC {
//--------------------------------------------------
/** allow FE_Engine to construct data manager after mesh is constructed */
void ATC_Coupling::construct_prescribed_data_manager (void) {
void ATC_Coupling::construct_prescribed_data_manager () {
prescribedDataMgr_ = new PrescribedDataManager(feEngine_,fieldSizes_);
}
@ -1704,7 +1704,7 @@ namespace ATC {
//--------------------------------------------------------------
/** method to trigger construction of mesh data after mesh construction */
//--------------------------------------------------------------
void ATC_Coupling::initialize_mesh_data(void)
void ATC_Coupling::initialize_mesh_data()
{
int nelts = feEngine_->fe_mesh()->num_elements();
elementToMaterialMap_.reset(nelts);
@ -1715,7 +1715,7 @@ namespace ATC {
}
//--------------------------------------------------------
void ATC_Coupling::reset_flux_mask(void)
void ATC_Coupling::reset_flux_mask()
{
int i;
// this is exact only for uniform meshes and certain types of atomic weights

View File

@ -357,7 +357,7 @@ namespace ATC {
// compute_scalar : added energy
// this is used in the line search
//--------------------------------------------------------------------
double ATC_CouplingMomentum::compute_scalar(void)
double ATC_CouplingMomentum::compute_scalar()
{
double energy = extrinsicModelManager_.compute_scalar();
return energy;

View File

@ -327,7 +327,7 @@ namespace ATC {
//--------------------------------------------------------------------
// compute_scalar : added energy
//--------------------------------------------------------------------
double ATC_CouplingMomentumEnergy::compute_scalar(void)
double ATC_CouplingMomentumEnergy::compute_scalar()
{
double energy = 0.0;
energy += extrinsicModelManager_.compute_scalar();
@ -337,7 +337,7 @@ namespace ATC {
//--------------------------------------------------------------------
// total kinetic energy
//--------------------------------------------------------------------
double ATC_CouplingMomentumEnergy::kinetic_energy(void)
double ATC_CouplingMomentumEnergy::kinetic_energy()
{
const MATRIX & M = massMats_[VELOCITY].quantity();
@ -355,7 +355,7 @@ namespace ATC {
//--------------------------------------------------------------------
// total potential energy
//--------------------------------------------------------------------
double ATC_CouplingMomentumEnergy::potential_energy(void)
double ATC_CouplingMomentumEnergy::potential_energy()
{
Array<FieldName> mask(1);
mask(0) = VELOCITY;

View File

@ -3,6 +3,7 @@
#ifndef ATC_ERROR
#define ATC_ERROR
#include <exception>
#include <string>
// the following two convert __LINE__ to a string
@ -23,7 +24,7 @@ namespace ATC {
* @brief Base class for throwing run-time errors with descriptions
*/
class ATC_Error {
class ATC_Error : public std::exception {
public:
// constructor
@ -31,17 +32,21 @@ class ATC_Error {
{
errorDescription_ = "ERROR: " + errorDescription;
ERROR_FOR_BACKTRACE
};
}
ATC_Error(std::string location, std::string errorDescription)
{
errorDescription_ = "ERROR: " + location + ": "+ errorDescription;
ERROR_FOR_BACKTRACE
};
}
std::string error_description() {
return errorDescription_;
};
}
const char *what() const noexcept override {
return errorDescription_.c_str();
}
private:
// string describing the type of error

View File

@ -1674,7 +1674,7 @@ pecified
}
//-------------------------------------------------------------------
void ATC_Method::set_reference_potential_energy(void)
void ATC_Method::set_reference_potential_energy()
{
if (setRefPE_) {
if (setRefPEvalue_) {
@ -2170,7 +2170,7 @@ pecified
// }
}
//--------------------------------------------------------
void ATC_Method::compute_nodeset_output(void)
void ATC_Method::compute_nodeset_output()
{
map< pair <string, FieldName>, NodesetOperationType >::const_iterator iter;
for (iter = nsetData_.begin(); iter != nsetData_.end();iter++){
@ -2194,7 +2194,7 @@ pecified
}
}
//--------------------------------------------------------
void ATC_Method::compute_faceset_output(void)
void ATC_Method::compute_faceset_output()
{
map < pair<string,string>, FacesetIntegralType >::const_iterator iter;
DENS_MAT values;
@ -2223,7 +2223,7 @@ pecified
}
}
//--------------------------------------------------------
void ATC_Method::compute_elementset_output(void)
void ATC_Method::compute_elementset_output()
{
map< pair <string, FieldName>, ElementsetOperationType >::const_iterator iter;
for (iter = esetData_.begin(); iter != esetData_.end();iter++){
@ -2379,7 +2379,7 @@ pecified
}
//--------------------------------------------------------
void ATC_Method::remap_ghost_ref_positions(void)
void ATC_Method::remap_ghost_ref_positions()
{
int nlocal = lammpsInterface_->nlocal();

View File

@ -916,12 +916,12 @@ namespace ATC {
}
//-------------------------------------------------------------------
void ATC_Transfer::compute_bond_matrix(void)
void ATC_Transfer::compute_bond_matrix()
{
bondMatrix_->reset();
}
//-------------------------------------------------------------------
void ATC_Transfer::compute_fields(void)
void ATC_Transfer::compute_fields()
{
// keep per-atom computes fresh. JAZ and REJ not sure why;

View File

@ -83,7 +83,7 @@ using ATC_Utility::to_string;
}
//-------------------------------------------------------------------
void ATC_TransferKernel::compute_kernel_matrix_molecule(void) // KKM add
void ATC_TransferKernel::compute_kernel_matrix_molecule() // KKM add
{
int nLocalMol = smallMoleculeSet_->local_molecule_count();
if (nLocal_>0) {

View File

@ -688,7 +688,7 @@ namespace ATC {
// compute_sparsity
// - creates sparsity template
//--------------------------------------------------------
void RegulatorShapeFunction::compute_sparsity(void)
void RegulatorShapeFunction::compute_sparsity()
{
// first get local pattern from N N^T

View File

@ -198,7 +198,7 @@ namespace ATC {
// nomenclature might be a bit backwark: control --> nodes that exert the control, & influence --> atoms that feel the influence
void ChargeRegulatorMethod::initialize(void)
void ChargeRegulatorMethod::initialize()
{
interscaleManager_ = &(atc_->interscale_manager());
@ -220,7 +220,7 @@ namespace ATC {
int ChargeRegulatorMethod::nlocal() { return atc_->nlocal(); }
void ChargeRegulatorMethod::set_greens_functions(void)
void ChargeRegulatorMethod::set_greens_functions()
{
// set up Green's function per node
for (int i = 0; i < nNodes_; i++) {
@ -272,7 +272,7 @@ namespace ATC {
//--------------------------------------------------------
// Initialize
//--------------------------------------------------------
void ChargeRegulatorMethodFeedback::initialize(void)
void ChargeRegulatorMethodFeedback::initialize()
{
ChargeRegulatorMethod::initialize();
if (surfaceType_ != ChargeRegulator::CONDUCTOR)
@ -284,7 +284,7 @@ namespace ATC {
//--------------------------------------------------------
// Initialize
//--------------------------------------------------------
void ChargeRegulatorMethodFeedback::construct_transfers(void)
void ChargeRegulatorMethodFeedback::construct_transfers()
{
ChargeRegulatorMethod::construct_transfers();
@ -301,7 +301,7 @@ namespace ATC {
//--------------------------------------------------------
// find measurement atoms and nodes
//--------------------------------------------------------
void ChargeRegulatorMethodFeedback::set_influence(void)
void ChargeRegulatorMethodFeedback::set_influence()
{
// get nodes that overlap influence atoms & compact list of influence atoms
@ -321,7 +321,7 @@ namespace ATC {
//--------------------------------------------------------
// constuct a Green's submatrix
//--------------------------------------------------------
void ChargeRegulatorMethodFeedback::set_influence_matrix(void)
void ChargeRegulatorMethodFeedback::set_influence_matrix()
{
// construct control-influence matrix bar{G}^-1: ds{p} = G{p,m}^-1 dphi{m}
@ -434,7 +434,7 @@ namespace ATC {
//--------------------------------------------------------
// Initialize
//--------------------------------------------------------
void ChargeRegulatorMethodImageCharge::initialize(void)
void ChargeRegulatorMethodImageCharge::initialize()
{
ChargeRegulatorMethod::initialize();
if (surfaceType_ != ChargeRegulator::DIELECTRIC) throw ATC_Error("currently image charge can only mimic a dielectric");

View File

@ -224,7 +224,7 @@ const double kMinScale_ = 10000.;
//--------------------------------------------------------
// Initialize
//--------------------------------------------------------
void ConcentrationRegulatorMethodTransition::initialize(void)
void ConcentrationRegulatorMethodTransition::initialize()
{
#ifdef ATC_VERBOSE
lammpsInterface_->print_msg_once(
@ -289,7 +289,7 @@ const double kMinScale_ = 10000.;
//--------------------------------------------------------
// pre exchange
//--------------------------------------------------------
void ConcentrationRegulatorMethodTransition::pre_exchange(void)
void ConcentrationRegulatorMethodTransition::pre_exchange()
{
// return if should not be called on this timestep
if ( ! lammpsInterface_->now(frequency_)) return;
@ -312,7 +312,7 @@ const double kMinScale_ = 10000.;
//--------------------------------------------------------
// pre force
//--------------------------------------------------------
void ConcentrationRegulatorMethodTransition::pre_force(void)
void ConcentrationRegulatorMethodTransition::pre_force()
{
transition();
}
@ -353,7 +353,7 @@ const double kMinScale_ = 10000.;
//--------------------------------------------------------
// excess
//--------------------------------------------------------
int ConcentrationRegulatorMethodTransition::excess(void) const
int ConcentrationRegulatorMethodTransition::excess() const
{
int nexcess = count()-targetCount_;
nexcess = max(min(nexcess,maxExchanges_),-maxExchanges_);
@ -362,7 +362,7 @@ const double kMinScale_ = 10000.;
//--------------------------------------------------------
// count
//--------------------------------------------------------
int ConcentrationRegulatorMethodTransition::count(void) const
int ConcentrationRegulatorMethodTransition::count() const
{
// integrate concentration over region
const DENS_MAT & c = (atc_->field(SPECIES_CONCENTRATION)).quantity();

View File

@ -181,7 +181,7 @@ namespace ATC {
//--------------------------------------------------------
// compute_scalar
//--------------------------------------------------------
double ExtrinsicModelManager::compute_scalar(void)
double ExtrinsicModelManager::compute_scalar()
{
double value = 0.;
vector<ExtrinsicModel *>::iterator imodel;
@ -360,7 +360,7 @@ namespace ATC {
//--------------------------------------------------------
// initialize
//--------------------------------------------------------
void ExtrinsicModel::initialize(void)
void ExtrinsicModel::initialize()
{
physicsModel_->initialize();
}

View File

@ -483,7 +483,7 @@ namespace ATC {
//--------------------------------------------------------
// compute_scalar : added energy = - f.x
//--------------------------------------------------------
double ExtrinsicModelElectrostatic::compute_scalar(void)
double ExtrinsicModelElectrostatic::compute_scalar()
{
//((atc_->interscale_manager()).fundamental_atom_quantity(LammpsInterface::ATOM_POSITION))->force_reset();
const DENS_MAT & atomPosition = ((atc_->interscale_manager()).fundamental_atom_quantity(LammpsInterface::ATOM_POSITION))->quantity();

View File

@ -430,7 +430,7 @@ namespace ATC{
//-----------------------------------------------------------------
// write geometry
//-----------------------------------------------------------------
void FE_Engine::write_geometry(void)
void FE_Engine::write_geometry()
{
outputManager_.write_geometry(feMesh_->coordinates(),
feMesh_->connectivity());
@ -2373,7 +2373,7 @@ namespace ATC{
feMesh_->face_shape_function(face, _fN_, _fdN_, _nN_, _fweights_);
feMesh_->element_coordinates(elem, xCoords);
MultAB(xCoords,_fN_,xAtIPs,0,1); //xAtIPs = xCoords*(N.transpose());
MultAB(xCoords,_fN_,xAtIPs,false,true); //xAtIPs = xCoords*(N.transpose());
// interpolate prescribed flux at ips of this element

View File

@ -404,7 +404,7 @@ namespace ATC {
// -------------------------------------------------------------
// initialize
// -------------------------------------------------------------
void FE_Mesh::initialize(void)
void FE_Mesh::initialize()
{
bool aligned = is_aligned();
@ -469,7 +469,7 @@ namespace ATC {
// -------------------------------------------------------------
// test whether almost structured
// -------------------------------------------------------------
bool FE_Mesh::is_aligned(void) const
bool FE_Mesh::is_aligned() const
{
vector<bool> foundBestMatch(nSD_,false);
vector<DENS_VEC> tangents(nSD_);
@ -518,7 +518,7 @@ namespace ATC {
// -------------------------------------------------------------
// element_type
// -------------------------------------------------------------
string FE_Mesh::element_type(void) const {
string FE_Mesh::element_type() const {
int npe = feElement_->num_elt_nodes();
if (npe == 4) { return "TET4"; }
else if (npe == 8) { return "HEX8"; }
@ -1915,7 +1915,7 @@ namespace ATC {
return true;
}
void FE_3DMesh::set_unique_connectivity(void)
void FE_3DMesh::set_unique_connectivity()
{
int numEltNodes = feElement_->num_elt_nodes();
connectivityUnique_.reset(numEltNodes, nElts_);

View File

@ -387,7 +387,7 @@ double LammpsInterface::atom_quantity_conversion(FundamentalAtomQuantity quantit
int LammpsInterface::dimension() const { return lammps_->domain->dimension; }
int LammpsInterface::nregion() const { return lammps_->domain->nregion; }
int LammpsInterface::nregion() const { return lammps_->domain->get_region_list().size(); }
void LammpsInterface::box_bounds(double & boxxlo, double & boxxhi,
double & boxylo, double & boxyhi,
@ -483,7 +483,7 @@ void LammpsInterface::periodicity_correction(double * x) const
}
}
void LammpsInterface::set_reference_box(void) const
void LammpsInterface::set_reference_box() const
{
double * hi = lammps_->domain->boxhi;
double * lo = lammps_->domain->boxlo;
@ -527,14 +527,15 @@ void LammpsInterface::box_periodicity(int & xperiodic,
zperiodic = lammps_->domain->zperiodic;
}
int LammpsInterface::region_id(const char * regionName) const {
int nregion = this->nregion();
for (int iregion = 0; iregion < nregion; iregion++) {
if (strcmp(regionName, region_name(iregion)) == 0) {
int LammpsInterface::region_id(const char *regionName) const {
auto regions = lammps_->domain->get_region_list();
int iregion = 0;
for (auto reg : regions) {
if (strcmp(regionName, reg->id) == 0) {
return iregion;
}
++iregion;
}
throw ATC_Error("Region has not been defined");
return -1;
}
@ -570,7 +571,7 @@ void LammpsInterface::closest_image(const double * const xi, const double * cons
// -----------------------------------------------------------------
// update interface methods
// -----------------------------------------------------------------
LammpsInterface::UnitsType LammpsInterface::units_style(void) const
LammpsInterface::UnitsType LammpsInterface::units_style() const
{
if (strcmp(lammps_->update->unit_style,"lj") == 0) return LJ;
else if (strcmp(lammps_->update->unit_style,"real") == 0) return REAL;
@ -655,7 +656,7 @@ void LammpsInterface::basis_vectors(double **basis) const
}
//* gets the (max) lattice constant
double LammpsInterface::max_lattice_constant(void) const
double LammpsInterface::max_lattice_constant() const
{
double a1[3], a2[3], a3[3];
unit_cell(a1,a2,a3);
@ -666,7 +667,7 @@ double LammpsInterface::max_lattice_constant(void) const
}
//* computes a cutoff distance halfway between 1st and 2nd nearest neighbors
double LammpsInterface::near_neighbor_cutoff(void) const
double LammpsInterface::near_neighbor_cutoff() const
{
double cutoff;
double alat = LammpsInterface::max_lattice_constant();
@ -716,7 +717,7 @@ void LammpsInterface::unit_cell(double *a1, double *a2, double *a3) const
}
//* gets number of atoms in a unit cell
int LammpsInterface::num_atoms_per_cell(void) const
int LammpsInterface::num_atoms_per_cell() const
{
int naCell = 0;
LatticeType type = lattice_style();
@ -733,7 +734,7 @@ int LammpsInterface::num_atoms_per_cell(void) const
}
//* gets tributary volume for an atom
double LammpsInterface::volume_per_atom(void) const
double LammpsInterface::volume_per_atom() const
{
double naCell = num_atoms_per_cell();
double volPerAtom =
@ -1322,61 +1323,73 @@ int** LammpsInterface::bond_list() const { return lammps_->neighbor->bondlist;
char * LammpsInterface::region_name(int iRegion) const
{
return lammps_->domain->regions[iRegion]->id;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->id;
}
char * LammpsInterface::region_style(int iRegion) const
{
return lammps_->domain->regions[iRegion]->style;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->style;
}
double LammpsInterface::region_xlo(int iRegion) const
{
return lammps_->domain->regions[iRegion]->extent_xlo;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->extent_xlo;
}
double LammpsInterface::region_xhi(int iRegion) const
{
return lammps_->domain->regions[iRegion]->extent_xhi;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->extent_xhi;
}
double LammpsInterface::region_ylo(int iRegion) const
{
return lammps_->domain->regions[iRegion]->extent_ylo;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->extent_ylo;
}
double LammpsInterface::region_yhi(int iRegion) const
{
return lammps_->domain->regions[iRegion]->extent_yhi;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->extent_yhi;
}
double LammpsInterface::region_zlo(int iRegion) const
{
return lammps_->domain->regions[iRegion]->extent_zlo;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->extent_zlo;
}
double LammpsInterface::region_zhi(int iRegion) const
{
return lammps_->domain->regions[iRegion]->extent_zhi;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->extent_zhi;
}
double LammpsInterface::region_xscale(int iRegion) const
{
return lammps_->domain->regions[iRegion]->xscale;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->xscale;
}
double LammpsInterface::region_yscale(int iRegion) const
{
return lammps_->domain->regions[iRegion]->yscale;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->yscale;
}
double LammpsInterface::region_zscale(int iRegion) const
{
return lammps_->domain->regions[iRegion]->zscale;
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->zscale;
}
int LammpsInterface::region_match(int iRegion, double x, double y, double z) const {
return lammps_->domain->regions[iRegion]->match(x,y,z);
auto regions = lammps_->domain->get_region_list();
return regions[iRegion]->match(x,y,z);
}
// -----------------------------------------------------------------
@ -1468,7 +1481,7 @@ LAMMPS_NS::Compute * LammpsInterface::const_to_active(COMPUTE_POINTER computePoi
// compute pe/atom interface methods
// - the only compute "owned" by ATC
// -----------------------------------------------------------------
int LammpsInterface::create_compute_pe_peratom(void) const
int LammpsInterface::create_compute_pe_peratom() const
{
char **list = new char*[4];
string atomPeName = compute_pe_name();
@ -1493,7 +1506,7 @@ int LammpsInterface::create_compute_pe_peratom(void) const
return icompute;
}
double * LammpsInterface::compute_pe_peratom(void) const
double * LammpsInterface::compute_pe_peratom() const
{
if (atomPE_) {
atomPE_->compute_peratom();

View File

@ -84,7 +84,7 @@ LinearSolver::LinearSolver(
// --------------------------------------------------------------------
// Setup
// --------------------------------------------------------------------
void LinearSolver::setup(void)
void LinearSolver::setup()
{
tol_ = kTol;
nVariables_ = matrix_.nRows();
@ -113,7 +113,7 @@ void LinearSolver::setup(void)
// --------------------------------------------------------------------
// Initialize
// --------------------------------------------------------------------
void LinearSolver::allow_reinitialization(void)
void LinearSolver::allow_reinitialization()
{
if (constraintHandlerType_ == PENALIZE_CONSTRAINTS) {
if (matrixModified_ ) throw ATC_Error("LinearSolver: can't allow reinitialization after matrix has been modified");
@ -157,7 +157,7 @@ void LinearSolver::initialize(const BC_SET * bcs)
// --------------------------------------------------------------------
// initialize_matrix
// --------------------------------------------------------------------
void LinearSolver::initialize_matrix(void)
void LinearSolver::initialize_matrix()
{
if ( initializedMatrix_ ) return;
if (constraintHandlerType_ == PENALIZE_CONSTRAINTS) {
@ -172,7 +172,7 @@ void LinearSolver::initialize_matrix(void)
// --------------------------------------------------------------------
// initialize_inverse
// --------------------------------------------------------------------
void LinearSolver::initialize_inverse(void)
void LinearSolver::initialize_inverse()
{
if ( initializedInverse_ ) return;
if (solverType_ == ITERATIVE_SOLVE_SYMMETRIC
@ -196,7 +196,7 @@ void LinearSolver::initialize_inverse(void)
// --------------------------------------------------------------------
// initialize_rhs
// --------------------------------------------------------------------
void LinearSolver::initialize_rhs(void)
void LinearSolver::initialize_rhs()
{
if (! rhs_ ) return;
if (! bcs_ ) {
@ -215,7 +215,7 @@ void LinearSolver::initialize_rhs(void)
// add matrix penalty
// - change matrix for Dirichlet conditions: add penalty
// --------------------------------------------------------------------
void LinearSolver::add_matrix_penalty(void)
void LinearSolver::add_matrix_penalty()
{
penalty_ = kPenalty; // relative to matrix diagonal
SPAR_MAT & A = matrixCopy_;
@ -233,7 +233,7 @@ void LinearSolver::add_matrix_penalty(void)
// partition matrix
// - partition matrix based on Dirichlet constraints
// --------------------------------------------------------------------
void LinearSolver::partition_matrix(void)
void LinearSolver::partition_matrix()
{
fixedSet_.clear();
BC_SET::const_iterator itr;

View File

@ -172,7 +172,7 @@ void OutputManager::read_restart_file(string fileName, RESTART_LIST *data)
//-----------------------------------------------------------------------------
//*
//-----------------------------------------------------------------------------
void OutputManager::write_globals(void)
void OutputManager::write_globals()
{
if ( outputPrefix_ == "NULL") return;
string file = outputPrefix_ + ".GLOBALS";
@ -224,7 +224,7 @@ void OutputManager::write_geometry(const MATRIX *coordinates,
//-----------------------------------------------------------------------------
//*
//-----------------------------------------------------------------------------
void OutputManager::write_geometry_ensight(void)
void OutputManager::write_geometry_ensight()
{
// geometry based on a reference configuration
string geom_file_name = outputPrefix_ + ".geo";
@ -327,7 +327,7 @@ void OutputManager::write_geometry_ensight(void)
//-----------------------------------------------------------------------------
//*
//-----------------------------------------------------------------------------
void OutputManager::write_geometry_text(void)
void OutputManager::write_geometry_text()
{
if ( outputPrefix_ == "NULL") return;
// geometry based on a reference configuration

View File

@ -18,7 +18,7 @@ PairMap::PairMap(LammpsInterface * lammpsInterface, int groupbit ):
nPairs_(0), nBonds_(0)
{
};
PairMap::~PairMap(void)
PairMap::~PairMap()
{
};
//==========================================================
@ -27,7 +27,7 @@ PairMapNeighbor::PairMapNeighbor(LammpsInterface * lammpsInterface, int groupbit
{
};
void PairMapNeighbor::reset(void) const
void PairMapNeighbor::reset() const
{
int inum = lammpsInterface_->neighbor_list_inum();
int *ilist = lammpsInterface_->neighbor_list_ilist();
@ -90,7 +90,7 @@ PairVirialEulerian::PairVirialEulerian(LammpsInterface * lammpsInterface,
};
void PairVirialEulerian::reset(void) const
void PairVirialEulerian::reset() const
{
int nPairs = pairMap_.size();
quantity_.reset(nPairs,nCols_);
@ -129,7 +129,7 @@ PairVirialLagrangian::PairVirialLagrangian(LammpsInterface * lammpsInterface,
};
void PairVirialLagrangian::reset(void) const
void PairVirialLagrangian::reset() const
{
int nPairs = pairMap_.size();
quantity_.reset(nPairs,nCols_);
@ -181,7 +181,7 @@ PairPotentialHeatFluxEulerian::PairPotentialHeatFluxEulerian(LammpsInterface * l
};
void PairPotentialHeatFluxEulerian::reset(void) const
void PairPotentialHeatFluxEulerian::reset() const
{
int nPairs = pairMap_.size();
quantity_.reset(nPairs,nCols_);
@ -217,7 +217,7 @@ PairPotentialHeatFluxLagrangian::PairPotentialHeatFluxLagrangian(LammpsInterface
};
void PairPotentialHeatFluxLagrangian::reset(void) const
void PairPotentialHeatFluxLagrangian::reset() const
{
int nPairs = pairMap_.size();
quantity_.reset(nPairs,nCols_);
@ -275,7 +275,7 @@ BondMatrixKernel::BondMatrixKernel(LammpsInterface * lammpsInterface,
if (kernelFunction_ == nullptr)
throw ATC_Error("No AtC kernel function initialized");
};
void BondMatrixKernel::reset(void) const
void BondMatrixKernel::reset() const
{
int nPairs = pairMap_.size(); // needs to come after quantity for reset
int nNodes = feMesh_->num_nodes_unique();
@ -328,7 +328,7 @@ BondMatrixPartitionOfUnity::BondMatrixPartitionOfUnity(LammpsInterface * lammpsI
lineWg_[i] *= 0.5;
}
};
void BondMatrixPartitionOfUnity::reset(void) const
void BondMatrixPartitionOfUnity::reset() const
{
int nNodes = feMesh_->num_nodes_unique();
int nPairs = pairMap_.size();

View File

@ -108,7 +108,7 @@ void PhysicsModel::parse_material_file(string fileName)
fileId.close();
}
void PhysicsModel::initialize(void)
void PhysicsModel::initialize()
{
// initialize materials
vector< Material* >::const_iterator iter;

View File

@ -111,7 +111,7 @@ PoissonSolver::~PoissonSolver()
// --------------------------------------------------------------------
// Initialize
// --------------------------------------------------------------------
void PoissonSolver::initialize(void)
void PoissonSolver::initialize()
{
nNodes_ = feEngine_->num_nodes();

View File

@ -523,7 +523,7 @@ namespace ATC {
// print
//-------------------------------------------------------------------------
void PrescribedDataManager::print(void)
void PrescribedDataManager::print()
{
// print and check consistency
enum dataType {FREE=0,FIELD,SOURCE};

View File

@ -735,7 +735,7 @@ double fermi_dirac(const double E, const double T)
else
ATC_Error("schrodinger-poisson solver:too many fixed");
}
GlobalSliceSchrodingerPoissonSolver::~GlobalSliceSchrodingerPoissonSolver(void) {
GlobalSliceSchrodingerPoissonSolver::~GlobalSliceSchrodingerPoissonSolver() {
if (solver_) delete solver_;
}
//--------------------------------------------------------------------------

View File

@ -237,7 +237,7 @@ StressCubicElastic::StressCubicElastic(fstream &fileId)
}
}
void StressCubicElastic::set_tangent(void)
void StressCubicElastic::set_tangent()
{
C_.reset(6,6);
C_(0,0)=C_(1,1)=C_(2,2) =c11_;
@ -374,7 +374,7 @@ StressCauchyBorn::~StressCauchyBorn()
//==============================================================================
// initialize
//==============================================================================
void StressCauchyBorn::initialize(void)
void StressCauchyBorn::initialize()
{
if (!initialized_) {
if (makeLinear_) linearize();
@ -393,7 +393,7 @@ void StressCauchyBorn::initialize(void)
//==============================================================================
// compute the bond stiffness consistent with the einstein freq
//==============================================================================
double StressCauchyBorn::stiffness(void) const
double StressCauchyBorn::stiffness() const
{
AtomCluster vac;
cblattice_->atom_cluster(eye<double>(3,3), potential_->cutoff_radius(), vac);

View File

@ -19,7 +19,7 @@ WeakEquationChargeDiffusion::WeakEquationChargeDiffusion()
//--------------------------------------------------------------
// Destructor
//--------------------------------------------------------------
WeakEquationChargeDiffusion::~WeakEquationChargeDiffusion(void)
WeakEquationChargeDiffusion::~WeakEquationChargeDiffusion()
{}
//---------------------------------------------------------------------
// compute capacity

View File

@ -19,7 +19,7 @@ WeakEquationDiffusion::WeakEquationDiffusion()
//--------------------------------------------------------------
// Destructor
//--------------------------------------------------------------
WeakEquationDiffusion::~WeakEquationDiffusion(void)
WeakEquationDiffusion::~WeakEquationDiffusion()
{}
//---------------------------------------------------------------------
// compute capacity

View File

@ -18,7 +18,7 @@ WeakEquationElectronContinuity::WeakEquationElectronContinuity()
//--------------------------------------------------------------
// Destructor
//---------------------------------------------------------------------
WeakEquationElectronContinuity::~WeakEquationElectronContinuity(void)
WeakEquationElectronContinuity::~WeakEquationElectronContinuity()
{}
//---------------------------------------------------------------------
@ -66,7 +66,7 @@ WeakEquationElectronEquilibrium::WeakEquationElectronEquilibrium()
//--------------------------------------------------------------
// Destructor
//---------------------------------------------------------------------
WeakEquationElectronEquilibrium::~WeakEquationElectronEquilibrium(void)
WeakEquationElectronEquilibrium::~WeakEquationElectronEquilibrium()
{}
//---------------------------------------------------------------------

View File

@ -18,7 +18,7 @@ WeakEquationElectronTemperature::WeakEquationElectronTemperature()
//--------------------------------------------------------------
// Destructor
//---------------------------------------------------------------------
WeakEquationElectronTemperature::~WeakEquationElectronTemperature(void)
WeakEquationElectronTemperature::~WeakEquationElectronTemperature()
{}
//---------------------------------------------------------------------
@ -93,7 +93,7 @@ WeakEquationElectronTemperatureJouleHeating::WeakEquationElectronTemperatureJoul
//--------------------------------------------------------------
// Destructor
//---------------------------------------------------------------------
WeakEquationElectronTemperatureJouleHeating::~WeakEquationElectronTemperatureJouleHeating(void)
WeakEquationElectronTemperatureJouleHeating::~WeakEquationElectronTemperatureJouleHeating()
{}
//---------------------------------------------------------------------
@ -162,7 +162,7 @@ WeakEquationElectronTemperatureConvection::WeakEquationElectronTemperatureConvec
//--------------------------------------------------------------
// Destructor
//---------------------------------------------------------------------
WeakEquationElectronTemperatureConvection::~WeakEquationElectronTemperatureConvection(void)
WeakEquationElectronTemperatureConvection::~WeakEquationElectronTemperatureConvection()
{
// do nothing
}

View File

@ -19,7 +19,7 @@ WeakEquationMassDiffusion::WeakEquationMassDiffusion()
//--------------------------------------------------------------
// Destructor
//--------------------------------------------------------------
WeakEquationMassDiffusion::~WeakEquationMassDiffusion(void)
WeakEquationMassDiffusion::~WeakEquationMassDiffusion()
{}
//---------------------------------------------------------------------
// compute capacity

View File

@ -19,7 +19,7 @@ WeakEquationPhononTemperature::WeakEquationPhononTemperature()
//--------------------------------------------------------------
// Destructor
//--------------------------------------------------------------
WeakEquationPhononTemperature::~WeakEquationPhononTemperature(void)
WeakEquationPhononTemperature::~WeakEquationPhononTemperature()
{}
//---------------------------------------------------------------------
// compute total energy
@ -67,7 +67,7 @@ WeakEquationPhononTemperatureExchange::WeakEquationPhononTemperatureExchange()
//--------------------------------------------------------------
// Destructor
//---------------------------------------------------------------------
WeakEquationPhononTemperatureExchange::~WeakEquationPhononTemperatureExchange(void)
WeakEquationPhononTemperatureExchange::~WeakEquationPhononTemperatureExchange()
{}
//---------------------------------------------------------------------

View File

@ -18,7 +18,7 @@ WeakEquationSchrodinger::WeakEquationSchrodinger()
//--------------------------------------------------------------
// Destructor
//---------------------------------------------------------------------
WeakEquationSchrodinger::~WeakEquationSchrodinger(void)
WeakEquationSchrodinger::~WeakEquationSchrodinger()
{}
//---------------------------------------------------------------------

View File

@ -13,14 +13,6 @@ endif
NVCC = nvcc
# obsolete hardware. not supported by current drivers anymore.
#CUDA_ARCH = -arch=sm_13
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
# Fermi hardware
#CUDA_ARCH = -arch=sm_20
#CUDA_ARCH = -arch=sm_21
# Kepler hardware
#CUDA_ARCH = -arch=sm_30
#CUDA_ARCH = -arch=sm_32
@ -45,6 +37,9 @@ CUDA_ARCH = -arch=sm_50
#CUDA_ARCH = -arch=sm_80
#CUDA_ARCH = -arch=sm_86
# Hopper hardware
#CUDA_ARCH = -arch=sm_90
CUDA_CODE = -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] \
-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] \
-gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_75,code=[sm_75,compute_75] \

View File

@ -171,7 +171,13 @@ NOTE: when compiling with CMake, all of the considerations listed below
are considered within the CMake configuration process, so no separate
compilation of the gpu library is required. Also this will build in support
for all compute architecture that are supported by the CUDA toolkit version
used to build the gpu library.
used to build the gpu library. A similar setup is possible using
Makefile.linux_multi after adjusting the settings for the CUDA toolkit in use.
Only CUDA toolkit version 8.0 and later and only GPU architecture 3.0
(aka Kepler) and later are supported by this version of LAMMPS. If you want
to use older hard- or software you have to compile for OpenCL or use an older
version of LAMMPS.
If you do not want to use a fat binary, that supports multiple CUDA
architectures, the CUDA_ARCH must be set to match the GPU architecture. This
@ -225,7 +231,8 @@ If GERYON_NUMA_FISSION is defined at build time, LAMMPS will consider separate
NUMA nodes on GPUs or accelerators as separate devices. For example, a 2-socket
CPU would appear as two separate devices for OpenCL (and LAMMPS would require
two MPI processes to use both sockets with the GPU library - each with its
own device ID as output by ocl_get_devices).
own device ID as output by ocl_get_devices). OpenCL version 1.2 or later is
required.
For a debug build, use "-DUCL_DEBUG -DGERYON_KERNEL_DUMP" and remove
"-DUCL_NO_EXIT" and "-DMPI_GERYON" from the build options.

View File

@ -379,18 +379,9 @@ UCL_Device::UCL_Device() {
prop.regsPerBlock = hip_prop.regsPerBlock;
prop.clockRate = hip_prop.clockRate;
prop.computeMode = hip_prop.computeMode;
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev));
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
//#if CUDA_VERSION >= 2020
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.integrated, hipDeviceAttributeIntegrated, dev));
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
//#endif
//#if CUDA_VERSION >= 3010
prop.concurrentKernels = hip_prop.concurrentKernels;
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
//#endif
_properties.push_back(prop);
}
@ -447,13 +438,11 @@ void UCL_Device::clear() {
// List all devices along with all properties
void UCL_Device::print_all(std::ostream &out) {
//#if CUDA_VERSION >= 2020
int driver_version;
hipDriverGetVersion(&driver_version);
out << "Driver Version: "
<< driver_version/1000 << "." << driver_version%100
<< std::endl;
//#endif
if (num_devices() == 0)
out << "There is no device supporting HIP\n";
@ -470,12 +459,10 @@ void UCL_Device::print_all(std::ostream &out) {
out << "No\n";
out << " Total amount of global memory: "
<< gigabytes(i) << " GB\n";
//#if CUDA_VERSION >= 2000
out << " Number of compute units/multiprocessors: "
<< _properties[i].multiProcessorCount << std::endl;
out << " Number of cores: "
<< cores(i) << std::endl;
//#endif
out << " Total amount of constant memory: "
<< _properties[i].totalConstantMemory << " bytes\n";
out << " Total amount of local/shared memory per block: "
@ -494,58 +481,29 @@ void UCL_Device::print_all(std::ostream &out) {
<< _properties[i].maxGridSize[0] << " x "
<< _properties[i].maxGridSize[1] << " x "
<< _properties[i].maxGridSize[2] << std::endl;
//out << " Maximum memory pitch: "
// << max_pitch(i) << " bytes\n";
//out << " Texture alignment: "
// << _properties[i].textureAlign << " bytes\n";
out << " Clock rate: "
<< clock_rate(i) << " GHz\n";
//#if CUDA_VERSION >= 2020
//out << " Run time limit on kernels: ";
//if (_properties[i].kernelExecTimeoutEnabled)
// out << "Yes\n";
//else
// out << "No\n";
out << " Integrated: ";
if (_properties[i].integrated)
out << "Yes\n";
else
out << "No\n";
//out << " Support host page-locked memory mapping: ";
//if (_properties[i].canMapHostMemory)
// out << "Yes\n";
//else
// out << "No\n";
out << " Compute mode: ";
if (_properties[i].computeMode == hipComputeModeDefault)
out << "Default\n"; // multiple threads can use device
//#if CUDA_VERSION >= 8000
// else if (_properties[i].computeMode == hipComputeModeExclusiveProcess)
//#else
else if (_properties[i].computeMode == hipComputeModeExclusive)
//#endif
out << "Exclusive\n"; // only thread can use device
else if (_properties[i].computeMode == hipComputeModeProhibited)
out << "Prohibited\n"; // no thread can use device
//#if CUDART_VERSION >= 4000
else if (_properties[i].computeMode == hipComputeModeExclusiveProcess)
out << "Exclusive Process\n"; // multiple threads 1 process
//#endif
else
out << "Unknown\n";
//#endif
//#if CUDA_VERSION >= 3010
out << " Concurrent kernel execution: ";
if (_properties[i].concurrentKernels)
out << "Yes\n";
else
out << "No\n";
//out << " Device has ECC support enabled: ";
//if (_properties[i].ECCEnabled)
// out << "Yes\n";
//else
// out << "No\n";
//#endif
}
}

View File

@ -5,11 +5,7 @@
#include <cassert>
#include <hip/hip_runtime.h>
//#if CUDA_VERSION >= 3020
#define CUDA_INT_TYPE size_t
//#else
//#define CUDA_INT_TYPE unsigned
//#endif
#ifdef MPI_GERYON
#include "mpi.h"

View File

@ -71,9 +71,6 @@ class UCL_Texture {
/// Make a texture reference available to kernel
inline void allow(UCL_Kernel &) {
//#if CUDA_VERSION < 4000
//CU_SAFE_CALL(cuParamSetTexRef(kernel._kernel, CU_PARAM_TR_DEFAULT, _tex));
//#endif
}
private:

View File

@ -320,6 +320,9 @@ class UCL_Device {
// Grabs the properties for all devices
UCL_Device::UCL_Device() {
#if CUDA_VERSION < 8000
#error CUDA Toolkit version 8 or later required
#endif
CU_SAFE_CALL_NS(cuInit(0));
CU_SAFE_CALL_NS(cuDeviceGetCount(&_num_devices));
for (int i=0; i<_num_devices; ++i) {
@ -358,16 +361,12 @@ UCL_Device::UCL_Device() {
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
#if CUDA_VERSION >= 2020
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,dev));
#endif
#if CUDA_VERSION >= 3010
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.concurrentKernels, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
#endif
_properties.push_back(prop);
}
@ -415,13 +414,10 @@ void UCL_Device::clear() {
// List all devices along with all properties
void UCL_Device::print_all(std::ostream &out) {
#if CUDA_VERSION >= 2020
int driver_version;
cuDriverGetVersion(&driver_version);
out << "CUDA Driver Version: "
<< driver_version/1000 << "." << driver_version%100
<< std::endl;
#endif
<< driver_version/1000 << "." << driver_version%100 << std::endl;
if (num_devices() == 0)
out << "There is no device supporting CUDA\n";
@ -438,12 +434,10 @@ void UCL_Device::print_all(std::ostream &out) {
out << "No\n";
out << " Total amount of global memory: "
<< gigabytes(i) << " GB\n";
#if CUDA_VERSION >= 2000
out << " Number of compute units/multiprocessors: "
<< _properties[i].multiProcessorCount << std::endl;
out << " Number of cores: "
<< cores(i) << std::endl;
#endif
out << " Total amount of constant memory: "
<< _properties[i].totalConstantMemory << " bytes\n";
out << " Total amount of local/shared memory per block: "
@ -468,7 +462,6 @@ void UCL_Device::print_all(std::ostream &out) {
<< _properties[i].textureAlign << " bytes\n";
out << " Clock rate: "
<< clock_rate(i) << " GHz\n";
#if CUDA_VERSION >= 2020
out << " Run time limit on kernels: ";
if (_properties[i].kernelExecTimeoutEnabled)
out << "Yes\n";
@ -487,22 +480,14 @@ void UCL_Device::print_all(std::ostream &out) {
out << " Compute mode: ";
if (_properties[i].computeMode == CU_COMPUTEMODE_DEFAULT)
out << "Default\n"; // multiple threads can use device
#if CUDA_VERSION >= 8000
else if (_properties[i].computeMode == CU_COMPUTEMODE_EXCLUSIVE_PROCESS)
#else
else if (_properties[i].computeMode == CU_COMPUTEMODE_EXCLUSIVE)
#endif
out << "Exclusive\n"; // only thread can use device
else if (_properties[i].computeMode == CU_COMPUTEMODE_PROHIBITED)
out << "Prohibited\n"; // no thread can use device
#if CUDART_VERSION >= 4000
else if (_properties[i].computeMode == CU_COMPUTEMODE_EXCLUSIVE_PROCESS)
out << "Exclusive Process\n"; // multiple threads 1 process
#endif
else
out << "Unknown\n";
#endif
#if CUDA_VERSION >= 3010
out << " Concurrent kernel execution: ";
if (_properties[i].concurrentKernels)
out << "Yes\n";
@ -513,7 +498,6 @@ void UCL_Device::print_all(std::ostream &out) {
out << "Yes\n";
else
out << "No\n";
#endif
}
}

View File

@ -165,17 +165,11 @@ class UCL_Program {
class UCL_Kernel {
public:
UCL_Kernel() : _dimensions(1), _num_args(0) {
#if CUDA_VERSION < 4000
_param_size=0;
#endif
_num_blocks[0]=0;
}
UCL_Kernel(UCL_Program &program, const char *function) :
_dimensions(1), _num_args(0) {
#if CUDA_VERSION < 4000
_param_size=0;
#endif
_num_blocks[0]=0;
set_function(program,function);
_cq=program._cq;
@ -211,11 +205,7 @@ class UCL_Kernel {
if (index==_num_args)
add_arg(arg);
else if (index<_num_args)
#if CUDA_VERSION >= 4000
_kernel_args[index]=arg;
#else
CU_SAFE_CALL(cuParamSetv(_kernel, _offsets[index], arg, sizeof(dtype)));
#endif
else
assert(0==1); // Must add kernel parameters in sequential order
}
@ -242,15 +232,7 @@ class UCL_Kernel {
/// Add a kernel argument.
inline void add_arg(const CUdeviceptr* const arg) {
#if CUDA_VERSION >= 4000
_kernel_args[_num_args]=(void *)arg;
#else
void* ptr = (void*)(size_t)(*arg);
_param_size = (_param_size + __alignof(ptr) - 1) & ~(__alignof(ptr) - 1);
CU_SAFE_CALL(cuParamSetv(_kernel, _param_size, &ptr, sizeof(ptr)));
_offsets.push_back(_param_size);
_param_size+=sizeof(ptr);
#endif
_num_args++;
if (_num_args>UCL_MAX_KERNEL_ARGS) assert(0==1);
}
@ -258,14 +240,7 @@ class UCL_Kernel {
/// Add a kernel argument.
template <class dtype>
inline void add_arg(const dtype* const arg) {
#if CUDA_VERSION >= 4000
_kernel_args[_num_args]=const_cast<dtype *>(arg);
#else
_param_size = (_param_size+__alignof(dtype)-1) & ~(__alignof(dtype)-1);
CU_SAFE_CALL(cuParamSetv(_kernel,_param_size,(void*)arg,sizeof(dtype)));
_offsets.push_back(_param_size);
_param_size+=sizeof(dtype);
#endif
_num_args++;
if (_num_args>UCL_MAX_KERNEL_ARGS) assert(0==1);
}
@ -298,13 +273,9 @@ class UCL_Kernel {
_num_blocks[0]=num_blocks;
_num_blocks[1]=1;
_num_blocks[2]=1;
#if CUDA_VERSION >= 4000
_block_size[0]=block_size;
_block_size[1]=1;
_block_size[2]=1;
#else
CU_SAFE_CALL(cuFuncSetBlockShape(_kernel,block_size,1,1));
#endif
}
/// Set the number of thread blocks and the number of threads in each block
@ -323,13 +294,9 @@ class UCL_Kernel {
_num_blocks[0]=num_blocks_x;
_num_blocks[1]=num_blocks_y;
_num_blocks[2]=1;
#if CUDA_VERSION >= 4000
_block_size[0]=block_size_x;
_block_size[1]=block_size_y;
_block_size[2]=1;
#else
CU_SAFE_CALL(cuFuncSetBlockShape(_kernel,block_size_x,block_size_y,1));
#endif
}
/// Set the number of thread blocks and the number of threads in each block
@ -350,14 +317,9 @@ class UCL_Kernel {
_num_blocks[0]=num_blocks_x;
_num_blocks[1]=num_blocks_y;
_num_blocks[2]=1;
#if CUDA_VERSION >= 4000
_block_size[0]=block_size_x;
_block_size[1]=block_size_y;
_block_size[2]=block_size_z;
#else
CU_SAFE_CALL(cuFuncSetBlockShape(_kernel,block_size_x,block_size_y,
block_size_z));
#endif
}
/// Set the number of thread blocks and the number of threads in each block
@ -373,23 +335,14 @@ class UCL_Kernel {
/// Run the kernel in the default command queue
inline void run() {
#if CUDA_VERSION >= 4000
CU_SAFE_CALL(cuLaunchKernel(_kernel,_num_blocks[0],_num_blocks[1],
_num_blocks[2],_block_size[0],_block_size[1],
_block_size[2],0,_cq,_kernel_args,nullptr));
#else
CU_SAFE_CALL(cuParamSetSize(_kernel,_param_size));
CU_SAFE_CALL(cuLaunchGridAsync(_kernel,_num_blocks[0],_num_blocks[1],_cq));
#endif
}
/// Clear any arguments associated with the kernel
inline void clear_args() {
_num_args=0;
#if CUDA_VERSION < 4000
_offsets.clear();
_param_size=0;
#endif
}
/// Return the default command queue/stream associated with this data
@ -406,13 +359,8 @@ class UCL_Kernel {
unsigned _num_args;
friend class UCL_Texture;
#if CUDA_VERSION >= 4000
unsigned _block_size[3];
void * _kernel_args[UCL_MAX_KERNEL_ARGS];
#else
std::vector<unsigned> _offsets;
unsigned _param_size;
#endif
};
} // namespace

View File

@ -5,11 +5,7 @@
#include <cassert>
#include <cuda.h>
#if CUDA_VERSION >= 3020
#define CUDA_INT_TYPE size_t
#else
#define CUDA_INT_TYPE unsigned
#endif
#ifdef MPI_GERYON
#include "mpi.h"

View File

@ -69,9 +69,6 @@ class UCL_Texture {
/// Make a texture reference available to kernel
inline void allow(UCL_Kernel &kernel) {
#if CUDA_VERSION < 4000
CU_SAFE_CALL(cuParamSetTexRef(kernel._kernel, CU_PARAM_TR_DEFAULT, _tex));
#endif
}
private:

View File

@ -41,7 +41,7 @@ using namespace ucl_cudart;
using namespace ucl_hip;
#endif
int main(int argc, char** argv) {
int main(int /*argc*/, char** /*argv*/) {
UCL_Device cop;
std::cout << "Found " << cop.num_platforms() << " platform(s).\n";
if (cop.num_platforms()>0)

View File

@ -25,21 +25,8 @@
#ifndef UCL_NV_KERNEL_H
#define UCL_NV_KERNEL_H
#if (__CUDA_ARCH__ < 200)
#define mul24 __mul24
#define MEM_THREADS 16
#else
#define mul24(X,Y) (X)*(Y)
#define MEM_THREADS 32
#endif
#ifdef CUDA_PRE_THREE
struct __builtin_align__(16) _double4
{
double x, y, z, w;
};
typedef struct _double4 double4;
#endif
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)

View File

@ -94,13 +94,13 @@ bool AnswerT::init(const int inum, const bool charge, const bool rot,
template <class numtyp, class acctyp>
bool AnswerT::add_fields(const bool charge, const bool rot) {
bool realloc=false;
if (charge && _charge==false) {
if (charge && !_charge) {
_charge=true;
_e_fields++;
_ev_fields++;
realloc=true;
}
if (rot && _rot==false) {
if (rot && !_rot) {
_rot=true;
realloc=true;
}
@ -163,10 +163,8 @@ void AnswerT::copy_answers(const bool eflag, const bool vflag,
#endif
int csize=_ev_fields;
if (!eflag)
csize-=_e_fields;
if (!vflag)
csize-=6;
if (!eflag) csize-=_e_fields;
if (!vflag) csize-=6;
if (csize>0)
engv.update_host(_ev_stride*csize,true);
@ -192,8 +190,7 @@ void AnswerT::copy_answers(const bool eflag, const bool vflag,
template <class numtyp, class acctyp>
double AnswerT::energy_virial(double *eatom, double **vatom,
double *virial) {
if (_eflag==false && _vflag==false)
return 0.0;
if (!_eflag && !_vflag) return 0.0;
double evdwl=0.0;
int vstart=0;
@ -241,11 +238,9 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
template <class numtyp, class acctyp>
double AnswerT::energy_virial(double *eatom, double **vatom,
double *virial, double &ecoul) {
if (_eflag==false && _vflag==false)
return 0.0;
if (!_eflag && !_vflag) return 0.0;
if (_charge==false)
return energy_virial(eatom,vatom,virial);
if (!_charge) return energy_virial(eatom,vatom,virial);
double evdwl=0.0;
int vstart=0, iend=_ev_stride;
@ -305,8 +300,8 @@ void AnswerT::get_answers(double **f, double **tor) {
if (_ilist==nullptr) {
typedef struct { double x,y,z; } vec3d;
typedef struct { acctyp x,y,z,w; } vec4d_t;
vec3d *fp=reinterpret_cast<vec3d*>(&(f[0][0]));
vec4d_t *forcep=reinterpret_cast<vec4d_t*>(&(force[0]));
auto fp=reinterpret_cast<vec3d*>(&(f[0][0]));
auto forcep=reinterpret_cast<vec4d_t*>(&(force[0]));
#if (LAL_USE_OMP == 1)
#pragma omp parallel
@ -329,8 +324,8 @@ void AnswerT::get_answers(double **f, double **tor) {
fp[i].z+=forcep[i].z;
}
if (_rot) {
vec3d *torp=reinterpret_cast<vec3d*>(&(tor[0][0]));
vec4d_t *torquep=reinterpret_cast<vec4d_t*>(&(force[_inum*4]));
auto torp=reinterpret_cast<vec3d*>(&(tor[0][0]));
auto torquep=reinterpret_cast<vec4d_t*>(&(force[_inum*4]));
for (int i=ifrom; i<ito; i++) {
torp[i].x+=torquep[i].x;
torp[i].y+=torquep[i].y;

View File

@ -107,17 +107,17 @@ bool AtomT::alloc(const int nall) {
gpu_bytes+=x_cast.device.row_bytes()+type_cast.device.row_bytes();
#endif
if (_charge && _host_view==false) {
if (_charge && !_host_view) {
success=success && (q.alloc(_max_atoms,*dev,UCL_WRITE_ONLY,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=q.device.row_bytes();
}
if (_rot && _host_view==false) {
if (_rot && !_host_view) {
success=success && (quat.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=quat.device.row_bytes();
}
if (_vel && _host_view==false) {
if (_vel && !_host_view) {
success=success && (v.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=v.device.row_bytes();
@ -161,37 +161,37 @@ bool AtomT::add_fields(const bool charge, const bool rot,
// Ignore host/device transfers?
int gpu_bytes=0;
if (charge && _charge==false) {
if (charge && !_charge) {
_charge=true;
_other=true;
if (_host_view==false) {
if (!_host_view) {
success=success && (q.alloc(_max_atoms,*dev,UCL_WRITE_ONLY,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=q.device.row_bytes();
}
}
if (rot && _rot==false) {
if (rot && !_rot) {
_rot=true;
_other=true;
if (_host_view==false) {
if (!_host_view) {
success=success && (quat.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=quat.device.row_bytes();
}
}
if (vel && _vel==false) {
if (vel && !_vel) {
_vel=true;
_other=true;
if (_host_view==false) {
if (!_host_view) {
success=success && (v.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=v.device.row_bytes();
}
}
if (bonds && _bonds==false) {
if (bonds && !_bonds) {
_bonds=true;
if (_bonds && _gpu_nbor>0) {
success=success && (dev_tag.alloc(_max_atoms,*dev,

View File

@ -101,7 +101,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
// Get the names of all nodes
int name_length;
char node_name[MPI_MAX_PROCESSOR_NAME];
char *node_names = new char[MPI_MAX_PROCESSOR_NAME*_world_size];
auto node_names = new char[MPI_MAX_PROCESSOR_NAME*_world_size];
MPI_Get_processor_name(node_name,&name_length);
MPI_Allgather(&node_name,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,&node_names[0],
MPI_MAX_PROCESSOR_NAME,MPI_CHAR,_comm_world);
@ -201,9 +201,9 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
unsigned best_cus = gpu->cus(0);
bool type_match = (gpu->device_type(0) == type);
for (int i = 1; i < gpu->num_devices(); i++) {
if (type_match==true && gpu->device_type(i)!=type)
if (type_match && gpu->device_type(i)!=type)
continue;
if (type_match == false && gpu->device_type(i) == type) {
if (type_match && gpu->device_type(i) == type) {
type_match = true;
best_cus = gpu->cus(i);
best_device = i;
@ -280,7 +280,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
MPI_Comm_rank(_comm_gpu,&_gpu_rank);
#if !defined(CUDA_PROXY) && !defined(CUDA_MPS_SUPPORT)
if (_procs_per_gpu>1 && gpu->sharing_supported(my_gpu)==false)
if (_procs_per_gpu>1 && !gpu->sharing_supported(my_gpu))
return -7;
#endif
@ -400,7 +400,7 @@ int DeviceT::set_ocl_params(std::string s_config, const std::string &extra_args)
_ocl_compile_string += " -DCONFIG_ID="+params[0]+
" -DSIMD_SIZE="+params[1]+
" -DMEM_THREADS="+params[2];
if (gpu->has_shuffle_support()==false)
if (!gpu->has_shuffle_support())
_ocl_compile_string+=" -DSHUFFLE_AVAIL=0";
else
_ocl_compile_string+=" -DSHUFFLE_AVAIL="+params[3];
@ -443,7 +443,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
const bool vel) {
if (!_device_init)
return -1;
if (sizeof(acctyp)==sizeof(double) && gpu->double_precision()==false)
if (sizeof(acctyp)==sizeof(double) && !gpu->double_precision())
return -5;
// Counts of data transfers for timing overhead estimates
@ -480,11 +480,11 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
if (vel)
_data_in_estimate++;
} else {
if (atom.charge()==false && charge)
if (!atom.charge() && charge)
_data_in_estimate++;
if (atom.quaternion()==false && rot)
if (!atom.quaternion() && rot)
_data_in_estimate++;
if (atom.velocity()==false && vel)
if (!atom.velocity() && vel)
_data_in_estimate++;
if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial,vel))
return -3;
@ -502,7 +502,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
const int nall) {
if (!_device_init)
return -1;
if (sizeof(acctyp)==sizeof(double) && gpu->double_precision()==false)
if (sizeof(acctyp)==sizeof(double) && !gpu->double_precision())
return -5;
if (_init_count==0) {

View File

@ -113,7 +113,7 @@ bool Neighbor::init(NeighborShared *shared, const int inum,
if (!success)
return false;
if (_use_packing==false) {
if (!_use_packing) {
#ifndef LAL_USE_OLD_NEIGHBOR
_shared->compile_kernels(devi, gpu_nbor, compile_flags+
" -DMAX_SUBGROUPS_PER_BLOCK="+toa(_block_nbor_build/_simd_size));
@ -153,7 +153,7 @@ void Neighbor::alloc(bool &success) {
int nt=_max_atoms+_max_host;
if (_max_nbors)
_max_nbors = ((_max_nbors-1)/_threads_per_atom+1)*_threads_per_atom;
if (_use_packing==false || _gpu_nbor>0) {
if (!_use_packing || _gpu_nbor>0) {
if (_max_nbors)
success=success &&
(dev_nbor.alloc((_max_nbors+2)*_max_atoms,*dev)==UCL_SUCCESS);
@ -166,7 +166,7 @@ void Neighbor::alloc(bool &success) {
_c_bytes=dev_nbor.row_bytes();
if (_alloc_packed) {
if (_use_packing==false) {
if (!_use_packing) {
dev_packed_begin.clear();
success=success && (dev_packed_begin.alloc(_max_atoms,*dev,
_packed_permissions)==UCL_SUCCESS);
@ -373,7 +373,7 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj,
time_nbor.stop();
if (_use_packing==false) {
if (!_use_packing) {
time_kernel.start();
int GX=static_cast<int>(ceil(static_cast<double>(inum)*_threads_per_atom/
block_size));
@ -450,7 +450,7 @@ void Neighbor::get_host3(const int inum, const int nlist, int *ilist, int *numj,
}
time_nbor.stop();
if (_use_packing==false) {
if (!_use_packing) {
time_kernel.start();
int GX=static_cast<int>(ceil(static_cast<double>(inum)*_threads_per_atom/
block_size));
@ -564,7 +564,7 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
#endif
}
const numtyp cutoff_cast=static_cast<numtyp>(_cutoff);
const auto cutoff_cast=static_cast<numtyp>(_cutoff);
if (_maxspecial>0) {
time_nbor.start();
@ -741,12 +741,12 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
// If binning on GPU, do this now
if (_gpu_nbor==1) {
mn = _max_nbors;
const numtyp i_cell_size=static_cast<numtyp>(1.0/_cell_size);
const auto i_cell_size=static_cast<numtyp>(1.0/_cell_size);
const int neigh_block=_block_cell_id;
const int GX=(int)ceil((double)nall/neigh_block);
const numtyp sublo0=static_cast<numtyp>(sublo[0]);
const numtyp sublo1=static_cast<numtyp>(sublo[1]);
const numtyp sublo2=static_cast<numtyp>(sublo[2]);
const auto sublo0=static_cast<numtyp>(sublo[0]);
const auto sublo1=static_cast<numtyp>(sublo[1]);
const auto sublo2=static_cast<numtyp>(sublo[2]);
_shared->k_cell_id.set_size(GX,neigh_block);
_shared->k_cell_id.run(&atom.x, &atom.dev_cell_id,
&atom.dev_particle_id, &sublo0, &sublo1,

View File

@ -303,7 +303,7 @@ int PPPMT::spread(const int ago, const int nlocal, const int nall,
double *host_q, double *boxlo,
const double delxinv, const double delyinv,
const double delzinv) {
if (_precompute_done==false) {
if (!_precompute_done) {
atom->acc_timers();
_precompute(ago,nlocal,nall,host_x,host_type,success,host_q,boxlo,delxinv,
delyinv,delzinv);
@ -359,7 +359,7 @@ void PPPMT::interp(const grdtyp qqrd2e_scale) {
time_interp.stop();
ans->copy_answers(false,false,false,false,0);
if (_kspace_split==false)
if (!_kspace_split)
device->add_ans_object(ans);
}

View File

@ -101,7 +101,7 @@ float * pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
float *b=pppm_gpu_init(PPPMF,nlocal,nall,screen,order,nxlo_out,nylo_out,
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,split,success);
if (split==false && respa==false)
if (!split && !respa)
PPPMF.device->set_single_precompute(&PPPMF);
return b;
}
@ -146,7 +146,7 @@ double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
split,success);
if (split==false && respa==false)
if (!split && !respa)
PPPMD.device->set_double_precompute(&PPPMD);
return b;
}

View File

@ -58,49 +58,6 @@
#define MAX_BIO_SHARED_TYPES 128
#define PPPM_MAX_SPLINE 8
// -------------------------------------------------------------------------
// LEGACY DEVICE CONFIGURATION
// -------------------------------------------------------------------------
#ifdef __CUDA_ARCH__
#if (__CUDA_ARCH__ < 200)
#undef CONFIG_ID
#define CONFIG_ID 101
#define MEM_THREADS 16
#undef THREADS_PER_ATOM
#define THREADS_PER_ATOM 1
#undef THREADS_PER_CHARGE
#define THREADS_PER_CHARGE 16
#undef BLOCK_PAIR
#define BLOCK_PAIR 64
#undef BLOCK_BIO_PAIR
#define BLOCK_BIO_PAIR 64
#undef BLOCK_NBOR_BUILD
#define BLOCK_NBOR_BUILD 64
#undef MAX_SHARED_TYPES
#define MAX_SHARED_TYPES 8
#undef SHUFFLE_AVAIL
#define SHUFFLE_AVAIL 0
#elseif (__CUDA_ARCH__ < 300)
#undef CONFIG_ID
#define CONFIG_ID 102
#undef BLOCK_PAIR
#define BLOCK_PAIR 128
#undef BLOCK_BIO_PAIR
#define BLOCK_BIO_PAIR 128
#undef MAX_SHARED_TYPES
#define MAX_SHARED_TYPES 8
#undef SHUFFLE_AVAIL
#define SHUFFLE_AVAIL 0
#endif
#endif
// -------------------------------------------------------------------------
// KERNEL MACROS
// -------------------------------------------------------------------------
@ -111,12 +68,6 @@
#define fast_mul(X,Y) (X)*(Y)
#ifdef __CUDA_ARCH__
#if (__CUDA_ARCH__ < 200)
#define fast_mul __mul24
#endif
#endif
#define EVFLAG 1
#define NOUNROLL
#define GLOBAL_ID_X threadIdx.x+fast_mul(blockIdx.x,blockDim.x)
@ -220,14 +171,6 @@
// KERNEL MACROS - MATH
// -------------------------------------------------------------------------
#ifdef CUDA_PRE_THREE
struct __builtin_align__(16) _double4
{
double x, y, z, w;
};
typedef struct _double4 double4;
#endif
#ifdef _DOUBLE_DOUBLE
#define ucl_exp exp

View File

@ -69,7 +69,7 @@ int YukawaColloidT::init(const int ntypes,
_max_rad_size=static_cast<int>(static_cast<double>(ef_nall)*1.10);
if (_shared_view==false)
if (!_shared_view)
c_rad.alloc(_max_rad_size,*(this->ucl_device),UCL_WRITE_ONLY,UCL_READ_ONLY);
rad_tex.get_texture(*(this->pair_program),"rad_tex");
@ -157,7 +157,7 @@ void YukawaColloidT::compute(const int f_ago, const int inum_full,
if (nall>_max_rad_size) {
_max_rad_size=static_cast<int>(static_cast<double>(nall)*1.10);
if (_shared_view==false) {
if (!_shared_view) {
c_rad.resize(_max_rad_size);
rad_tex.bind_float(c_rad,1);
}
@ -229,7 +229,7 @@ int** YukawaColloidT::compute(const int ago, const int inum_full,
if (nall>_max_rad_size) {
_max_rad_size=static_cast<int>(static_cast<double>(nall)*1.10);
if (_shared_view==false) {
if (!_shared_view) {
c_rad.resize(_max_rad_size);
rad_tex.bind_float(c_rad,1);
}

View File

@ -34,12 +34,12 @@ make lib-meam args="-m ifort" # build MEAM lib with custom Makefile.ifort (usi
# settings
version = "1.3.0"
version = "1.3.2"
url = "https://github.com/MolSSI-MDI/MDI_Library/archive/v%s.tar.gz" % version
# known checksums for different MDI versions. used to validate the download.
checksums = { \
'1.3.0' : '8a8da217148bd9b700083b67d795af5e', \
'1.3.2' : '836f5da400d8cff0f0e4435640f9454f', \
}
# print error message or help