Merge remote-tracking branch 'lammps/develop' into electrode
This commit is contained in:
@ -775,7 +775,7 @@ namespace ATC {
|
||||
|
||||
//--------------------------------------------------
|
||||
/** allow FE_Engine to construct data manager after mesh is constructed */
|
||||
void ATC_Coupling::construct_prescribed_data_manager (void) {
|
||||
void ATC_Coupling::construct_prescribed_data_manager () {
|
||||
prescribedDataMgr_ = new PrescribedDataManager(feEngine_,fieldSizes_);
|
||||
}
|
||||
|
||||
@ -1704,7 +1704,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------------
|
||||
/** method to trigger construction of mesh data after mesh construction */
|
||||
//--------------------------------------------------------------
|
||||
void ATC_Coupling::initialize_mesh_data(void)
|
||||
void ATC_Coupling::initialize_mesh_data()
|
||||
{
|
||||
int nelts = feEngine_->fe_mesh()->num_elements();
|
||||
elementToMaterialMap_.reset(nelts);
|
||||
@ -1715,7 +1715,7 @@ namespace ATC {
|
||||
}
|
||||
//--------------------------------------------------------
|
||||
|
||||
void ATC_Coupling::reset_flux_mask(void)
|
||||
void ATC_Coupling::reset_flux_mask()
|
||||
{
|
||||
int i;
|
||||
// this is exact only for uniform meshes and certain types of atomic weights
|
||||
|
||||
@ -357,7 +357,7 @@ namespace ATC {
|
||||
// compute_scalar : added energy
|
||||
// this is used in the line search
|
||||
//--------------------------------------------------------------------
|
||||
double ATC_CouplingMomentum::compute_scalar(void)
|
||||
double ATC_CouplingMomentum::compute_scalar()
|
||||
{
|
||||
double energy = extrinsicModelManager_.compute_scalar();
|
||||
return energy;
|
||||
|
||||
@ -327,7 +327,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------------------
|
||||
// compute_scalar : added energy
|
||||
//--------------------------------------------------------------------
|
||||
double ATC_CouplingMomentumEnergy::compute_scalar(void)
|
||||
double ATC_CouplingMomentumEnergy::compute_scalar()
|
||||
{
|
||||
double energy = 0.0;
|
||||
energy += extrinsicModelManager_.compute_scalar();
|
||||
@ -337,7 +337,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------------------
|
||||
// total kinetic energy
|
||||
//--------------------------------------------------------------------
|
||||
double ATC_CouplingMomentumEnergy::kinetic_energy(void)
|
||||
double ATC_CouplingMomentumEnergy::kinetic_energy()
|
||||
{
|
||||
const MATRIX & M = massMats_[VELOCITY].quantity();
|
||||
|
||||
@ -355,7 +355,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------------------
|
||||
// total potential energy
|
||||
//--------------------------------------------------------------------
|
||||
double ATC_CouplingMomentumEnergy::potential_energy(void)
|
||||
double ATC_CouplingMomentumEnergy::potential_energy()
|
||||
{
|
||||
Array<FieldName> mask(1);
|
||||
mask(0) = VELOCITY;
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
#ifndef ATC_ERROR
|
||||
#define ATC_ERROR
|
||||
|
||||
#include <exception>
|
||||
#include <string>
|
||||
|
||||
// the following two convert __LINE__ to a string
|
||||
@ -23,7 +24,7 @@ namespace ATC {
|
||||
* @brief Base class for throwing run-time errors with descriptions
|
||||
*/
|
||||
|
||||
class ATC_Error {
|
||||
class ATC_Error : public std::exception {
|
||||
|
||||
public:
|
||||
// constructor
|
||||
@ -31,17 +32,21 @@ class ATC_Error {
|
||||
{
|
||||
errorDescription_ = "ERROR: " + errorDescription;
|
||||
ERROR_FOR_BACKTRACE
|
||||
};
|
||||
}
|
||||
|
||||
ATC_Error(std::string location, std::string errorDescription)
|
||||
{
|
||||
errorDescription_ = "ERROR: " + location + ": "+ errorDescription;
|
||||
ERROR_FOR_BACKTRACE
|
||||
};
|
||||
}
|
||||
|
||||
std::string error_description() {
|
||||
return errorDescription_;
|
||||
};
|
||||
}
|
||||
|
||||
const char *what() const noexcept override {
|
||||
return errorDescription_.c_str();
|
||||
}
|
||||
|
||||
private:
|
||||
// string describing the type of error
|
||||
|
||||
@ -1674,7 +1674,7 @@ pecified
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
void ATC_Method::set_reference_potential_energy(void)
|
||||
void ATC_Method::set_reference_potential_energy()
|
||||
{
|
||||
if (setRefPE_) {
|
||||
if (setRefPEvalue_) {
|
||||
@ -2170,7 +2170,7 @@ pecified
|
||||
// }
|
||||
}
|
||||
//--------------------------------------------------------
|
||||
void ATC_Method::compute_nodeset_output(void)
|
||||
void ATC_Method::compute_nodeset_output()
|
||||
{
|
||||
map< pair <string, FieldName>, NodesetOperationType >::const_iterator iter;
|
||||
for (iter = nsetData_.begin(); iter != nsetData_.end();iter++){
|
||||
@ -2194,7 +2194,7 @@ pecified
|
||||
}
|
||||
}
|
||||
//--------------------------------------------------------
|
||||
void ATC_Method::compute_faceset_output(void)
|
||||
void ATC_Method::compute_faceset_output()
|
||||
{
|
||||
map < pair<string,string>, FacesetIntegralType >::const_iterator iter;
|
||||
DENS_MAT values;
|
||||
@ -2223,7 +2223,7 @@ pecified
|
||||
}
|
||||
}
|
||||
//--------------------------------------------------------
|
||||
void ATC_Method::compute_elementset_output(void)
|
||||
void ATC_Method::compute_elementset_output()
|
||||
{
|
||||
map< pair <string, FieldName>, ElementsetOperationType >::const_iterator iter;
|
||||
for (iter = esetData_.begin(); iter != esetData_.end();iter++){
|
||||
@ -2379,7 +2379,7 @@ pecified
|
||||
}
|
||||
|
||||
//--------------------------------------------------------
|
||||
void ATC_Method::remap_ghost_ref_positions(void)
|
||||
void ATC_Method::remap_ghost_ref_positions()
|
||||
{
|
||||
|
||||
int nlocal = lammpsInterface_->nlocal();
|
||||
|
||||
@ -916,12 +916,12 @@ namespace ATC {
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
void ATC_Transfer::compute_bond_matrix(void)
|
||||
void ATC_Transfer::compute_bond_matrix()
|
||||
{
|
||||
bondMatrix_->reset();
|
||||
}
|
||||
//-------------------------------------------------------------------
|
||||
void ATC_Transfer::compute_fields(void)
|
||||
void ATC_Transfer::compute_fields()
|
||||
{
|
||||
|
||||
// keep per-atom computes fresh. JAZ and REJ not sure why;
|
||||
|
||||
@ -83,7 +83,7 @@ using ATC_Utility::to_string;
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
void ATC_TransferKernel::compute_kernel_matrix_molecule(void) // KKM add
|
||||
void ATC_TransferKernel::compute_kernel_matrix_molecule() // KKM add
|
||||
{
|
||||
int nLocalMol = smallMoleculeSet_->local_molecule_count();
|
||||
if (nLocal_>0) {
|
||||
|
||||
@ -688,7 +688,7 @@ namespace ATC {
|
||||
// compute_sparsity
|
||||
// - creates sparsity template
|
||||
//--------------------------------------------------------
|
||||
void RegulatorShapeFunction::compute_sparsity(void)
|
||||
void RegulatorShapeFunction::compute_sparsity()
|
||||
{
|
||||
|
||||
// first get local pattern from N N^T
|
||||
|
||||
@ -198,7 +198,7 @@ namespace ATC {
|
||||
|
||||
|
||||
// nomenclature might be a bit backwark: control --> nodes that exert the control, & influence --> atoms that feel the influence
|
||||
void ChargeRegulatorMethod::initialize(void)
|
||||
void ChargeRegulatorMethod::initialize()
|
||||
{
|
||||
interscaleManager_ = &(atc_->interscale_manager());
|
||||
|
||||
@ -220,7 +220,7 @@ namespace ATC {
|
||||
|
||||
int ChargeRegulatorMethod::nlocal() { return atc_->nlocal(); }
|
||||
|
||||
void ChargeRegulatorMethod::set_greens_functions(void)
|
||||
void ChargeRegulatorMethod::set_greens_functions()
|
||||
{
|
||||
// set up Green's function per node
|
||||
for (int i = 0; i < nNodes_; i++) {
|
||||
@ -272,7 +272,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------
|
||||
// Initialize
|
||||
//--------------------------------------------------------
|
||||
void ChargeRegulatorMethodFeedback::initialize(void)
|
||||
void ChargeRegulatorMethodFeedback::initialize()
|
||||
{
|
||||
ChargeRegulatorMethod::initialize();
|
||||
if (surfaceType_ != ChargeRegulator::CONDUCTOR)
|
||||
@ -284,7 +284,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------
|
||||
// Initialize
|
||||
//--------------------------------------------------------
|
||||
void ChargeRegulatorMethodFeedback::construct_transfers(void)
|
||||
void ChargeRegulatorMethodFeedback::construct_transfers()
|
||||
{
|
||||
ChargeRegulatorMethod::construct_transfers();
|
||||
|
||||
@ -301,7 +301,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------
|
||||
// find measurement atoms and nodes
|
||||
//--------------------------------------------------------
|
||||
void ChargeRegulatorMethodFeedback::set_influence(void)
|
||||
void ChargeRegulatorMethodFeedback::set_influence()
|
||||
{
|
||||
|
||||
// get nodes that overlap influence atoms & compact list of influence atoms
|
||||
@ -321,7 +321,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------
|
||||
// constuct a Green's submatrix
|
||||
//--------------------------------------------------------
|
||||
void ChargeRegulatorMethodFeedback::set_influence_matrix(void)
|
||||
void ChargeRegulatorMethodFeedback::set_influence_matrix()
|
||||
{
|
||||
// construct control-influence matrix bar{G}^-1: ds{p} = G{p,m}^-1 dphi{m}
|
||||
|
||||
@ -434,7 +434,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------
|
||||
// Initialize
|
||||
//--------------------------------------------------------
|
||||
void ChargeRegulatorMethodImageCharge::initialize(void)
|
||||
void ChargeRegulatorMethodImageCharge::initialize()
|
||||
{
|
||||
ChargeRegulatorMethod::initialize();
|
||||
if (surfaceType_ != ChargeRegulator::DIELECTRIC) throw ATC_Error("currently image charge can only mimic a dielectric");
|
||||
|
||||
@ -224,7 +224,7 @@ const double kMinScale_ = 10000.;
|
||||
//--------------------------------------------------------
|
||||
// Initialize
|
||||
//--------------------------------------------------------
|
||||
void ConcentrationRegulatorMethodTransition::initialize(void)
|
||||
void ConcentrationRegulatorMethodTransition::initialize()
|
||||
{
|
||||
#ifdef ATC_VERBOSE
|
||||
lammpsInterface_->print_msg_once(
|
||||
@ -289,7 +289,7 @@ const double kMinScale_ = 10000.;
|
||||
//--------------------------------------------------------
|
||||
// pre exchange
|
||||
//--------------------------------------------------------
|
||||
void ConcentrationRegulatorMethodTransition::pre_exchange(void)
|
||||
void ConcentrationRegulatorMethodTransition::pre_exchange()
|
||||
{
|
||||
// return if should not be called on this timestep
|
||||
if ( ! lammpsInterface_->now(frequency_)) return;
|
||||
@ -312,7 +312,7 @@ const double kMinScale_ = 10000.;
|
||||
//--------------------------------------------------------
|
||||
// pre force
|
||||
//--------------------------------------------------------
|
||||
void ConcentrationRegulatorMethodTransition::pre_force(void)
|
||||
void ConcentrationRegulatorMethodTransition::pre_force()
|
||||
{
|
||||
transition();
|
||||
}
|
||||
@ -353,7 +353,7 @@ const double kMinScale_ = 10000.;
|
||||
//--------------------------------------------------------
|
||||
// excess
|
||||
//--------------------------------------------------------
|
||||
int ConcentrationRegulatorMethodTransition::excess(void) const
|
||||
int ConcentrationRegulatorMethodTransition::excess() const
|
||||
{
|
||||
int nexcess = count()-targetCount_;
|
||||
nexcess = max(min(nexcess,maxExchanges_),-maxExchanges_);
|
||||
@ -362,7 +362,7 @@ const double kMinScale_ = 10000.;
|
||||
//--------------------------------------------------------
|
||||
// count
|
||||
//--------------------------------------------------------
|
||||
int ConcentrationRegulatorMethodTransition::count(void) const
|
||||
int ConcentrationRegulatorMethodTransition::count() const
|
||||
{
|
||||
// integrate concentration over region
|
||||
const DENS_MAT & c = (atc_->field(SPECIES_CONCENTRATION)).quantity();
|
||||
|
||||
@ -181,7 +181,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------
|
||||
// compute_scalar
|
||||
//--------------------------------------------------------
|
||||
double ExtrinsicModelManager::compute_scalar(void)
|
||||
double ExtrinsicModelManager::compute_scalar()
|
||||
{
|
||||
double value = 0.;
|
||||
vector<ExtrinsicModel *>::iterator imodel;
|
||||
@ -360,7 +360,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------
|
||||
// initialize
|
||||
//--------------------------------------------------------
|
||||
void ExtrinsicModel::initialize(void)
|
||||
void ExtrinsicModel::initialize()
|
||||
{
|
||||
physicsModel_->initialize();
|
||||
}
|
||||
|
||||
@ -483,7 +483,7 @@ namespace ATC {
|
||||
//--------------------------------------------------------
|
||||
// compute_scalar : added energy = - f.x
|
||||
//--------------------------------------------------------
|
||||
double ExtrinsicModelElectrostatic::compute_scalar(void)
|
||||
double ExtrinsicModelElectrostatic::compute_scalar()
|
||||
{
|
||||
//((atc_->interscale_manager()).fundamental_atom_quantity(LammpsInterface::ATOM_POSITION))->force_reset();
|
||||
const DENS_MAT & atomPosition = ((atc_->interscale_manager()).fundamental_atom_quantity(LammpsInterface::ATOM_POSITION))->quantity();
|
||||
|
||||
@ -430,7 +430,7 @@ namespace ATC{
|
||||
//-----------------------------------------------------------------
|
||||
// write geometry
|
||||
//-----------------------------------------------------------------
|
||||
void FE_Engine::write_geometry(void)
|
||||
void FE_Engine::write_geometry()
|
||||
{
|
||||
outputManager_.write_geometry(feMesh_->coordinates(),
|
||||
feMesh_->connectivity());
|
||||
@ -2373,7 +2373,7 @@ namespace ATC{
|
||||
feMesh_->face_shape_function(face, _fN_, _fdN_, _nN_, _fweights_);
|
||||
feMesh_->element_coordinates(elem, xCoords);
|
||||
|
||||
MultAB(xCoords,_fN_,xAtIPs,0,1); //xAtIPs = xCoords*(N.transpose());
|
||||
MultAB(xCoords,_fN_,xAtIPs,false,true); //xAtIPs = xCoords*(N.transpose());
|
||||
|
||||
// interpolate prescribed flux at ips of this element
|
||||
|
||||
|
||||
@ -404,7 +404,7 @@ namespace ATC {
|
||||
// -------------------------------------------------------------
|
||||
// initialize
|
||||
// -------------------------------------------------------------
|
||||
void FE_Mesh::initialize(void)
|
||||
void FE_Mesh::initialize()
|
||||
{
|
||||
|
||||
bool aligned = is_aligned();
|
||||
@ -469,7 +469,7 @@ namespace ATC {
|
||||
// -------------------------------------------------------------
|
||||
// test whether almost structured
|
||||
// -------------------------------------------------------------
|
||||
bool FE_Mesh::is_aligned(void) const
|
||||
bool FE_Mesh::is_aligned() const
|
||||
{
|
||||
vector<bool> foundBestMatch(nSD_,false);
|
||||
vector<DENS_VEC> tangents(nSD_);
|
||||
@ -518,7 +518,7 @@ namespace ATC {
|
||||
// -------------------------------------------------------------
|
||||
// element_type
|
||||
// -------------------------------------------------------------
|
||||
string FE_Mesh::element_type(void) const {
|
||||
string FE_Mesh::element_type() const {
|
||||
int npe = feElement_->num_elt_nodes();
|
||||
if (npe == 4) { return "TET4"; }
|
||||
else if (npe == 8) { return "HEX8"; }
|
||||
@ -1915,7 +1915,7 @@ namespace ATC {
|
||||
return true;
|
||||
}
|
||||
|
||||
void FE_3DMesh::set_unique_connectivity(void)
|
||||
void FE_3DMesh::set_unique_connectivity()
|
||||
{
|
||||
int numEltNodes = feElement_->num_elt_nodes();
|
||||
connectivityUnique_.reset(numEltNodes, nElts_);
|
||||
|
||||
@ -387,7 +387,7 @@ double LammpsInterface::atom_quantity_conversion(FundamentalAtomQuantity quantit
|
||||
|
||||
int LammpsInterface::dimension() const { return lammps_->domain->dimension; }
|
||||
|
||||
int LammpsInterface::nregion() const { return lammps_->domain->nregion; }
|
||||
int LammpsInterface::nregion() const { return lammps_->domain->get_region_list().size(); }
|
||||
|
||||
void LammpsInterface::box_bounds(double & boxxlo, double & boxxhi,
|
||||
double & boxylo, double & boxyhi,
|
||||
@ -483,7 +483,7 @@ void LammpsInterface::periodicity_correction(double * x) const
|
||||
}
|
||||
}
|
||||
|
||||
void LammpsInterface::set_reference_box(void) const
|
||||
void LammpsInterface::set_reference_box() const
|
||||
{
|
||||
double * hi = lammps_->domain->boxhi;
|
||||
double * lo = lammps_->domain->boxlo;
|
||||
@ -527,14 +527,15 @@ void LammpsInterface::box_periodicity(int & xperiodic,
|
||||
zperiodic = lammps_->domain->zperiodic;
|
||||
}
|
||||
|
||||
int LammpsInterface::region_id(const char * regionName) const {
|
||||
int nregion = this->nregion();
|
||||
for (int iregion = 0; iregion < nregion; iregion++) {
|
||||
if (strcmp(regionName, region_name(iregion)) == 0) {
|
||||
int LammpsInterface::region_id(const char *regionName) const {
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
int iregion = 0;
|
||||
for (auto reg : regions) {
|
||||
if (strcmp(regionName, reg->id) == 0) {
|
||||
return iregion;
|
||||
}
|
||||
++iregion;
|
||||
}
|
||||
throw ATC_Error("Region has not been defined");
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -570,7 +571,7 @@ void LammpsInterface::closest_image(const double * const xi, const double * cons
|
||||
// -----------------------------------------------------------------
|
||||
// update interface methods
|
||||
// -----------------------------------------------------------------
|
||||
LammpsInterface::UnitsType LammpsInterface::units_style(void) const
|
||||
LammpsInterface::UnitsType LammpsInterface::units_style() const
|
||||
{
|
||||
if (strcmp(lammps_->update->unit_style,"lj") == 0) return LJ;
|
||||
else if (strcmp(lammps_->update->unit_style,"real") == 0) return REAL;
|
||||
@ -655,7 +656,7 @@ void LammpsInterface::basis_vectors(double **basis) const
|
||||
}
|
||||
|
||||
//* gets the (max) lattice constant
|
||||
double LammpsInterface::max_lattice_constant(void) const
|
||||
double LammpsInterface::max_lattice_constant() const
|
||||
{
|
||||
double a1[3], a2[3], a3[3];
|
||||
unit_cell(a1,a2,a3);
|
||||
@ -666,7 +667,7 @@ double LammpsInterface::max_lattice_constant(void) const
|
||||
}
|
||||
|
||||
//* computes a cutoff distance halfway between 1st and 2nd nearest neighbors
|
||||
double LammpsInterface::near_neighbor_cutoff(void) const
|
||||
double LammpsInterface::near_neighbor_cutoff() const
|
||||
{
|
||||
double cutoff;
|
||||
double alat = LammpsInterface::max_lattice_constant();
|
||||
@ -716,7 +717,7 @@ void LammpsInterface::unit_cell(double *a1, double *a2, double *a3) const
|
||||
}
|
||||
|
||||
//* gets number of atoms in a unit cell
|
||||
int LammpsInterface::num_atoms_per_cell(void) const
|
||||
int LammpsInterface::num_atoms_per_cell() const
|
||||
{
|
||||
int naCell = 0;
|
||||
LatticeType type = lattice_style();
|
||||
@ -733,7 +734,7 @@ int LammpsInterface::num_atoms_per_cell(void) const
|
||||
}
|
||||
|
||||
//* gets tributary volume for an atom
|
||||
double LammpsInterface::volume_per_atom(void) const
|
||||
double LammpsInterface::volume_per_atom() const
|
||||
{
|
||||
double naCell = num_atoms_per_cell();
|
||||
double volPerAtom =
|
||||
@ -1322,61 +1323,73 @@ int** LammpsInterface::bond_list() const { return lammps_->neighbor->bondlist;
|
||||
|
||||
char * LammpsInterface::region_name(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->id;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->id;
|
||||
}
|
||||
|
||||
char * LammpsInterface::region_style(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->style;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->style;
|
||||
}
|
||||
|
||||
double LammpsInterface::region_xlo(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->extent_xlo;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->extent_xlo;
|
||||
}
|
||||
|
||||
double LammpsInterface::region_xhi(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->extent_xhi;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->extent_xhi;
|
||||
}
|
||||
|
||||
double LammpsInterface::region_ylo(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->extent_ylo;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->extent_ylo;
|
||||
}
|
||||
|
||||
double LammpsInterface::region_yhi(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->extent_yhi;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->extent_yhi;
|
||||
}
|
||||
|
||||
double LammpsInterface::region_zlo(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->extent_zlo;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->extent_zlo;
|
||||
}
|
||||
|
||||
double LammpsInterface::region_zhi(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->extent_zhi;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->extent_zhi;
|
||||
}
|
||||
|
||||
double LammpsInterface::region_xscale(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->xscale;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->xscale;
|
||||
}
|
||||
|
||||
double LammpsInterface::region_yscale(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->yscale;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->yscale;
|
||||
}
|
||||
|
||||
double LammpsInterface::region_zscale(int iRegion) const
|
||||
{
|
||||
return lammps_->domain->regions[iRegion]->zscale;
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->zscale;
|
||||
}
|
||||
|
||||
int LammpsInterface::region_match(int iRegion, double x, double y, double z) const {
|
||||
return lammps_->domain->regions[iRegion]->match(x,y,z);
|
||||
auto regions = lammps_->domain->get_region_list();
|
||||
return regions[iRegion]->match(x,y,z);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
@ -1468,7 +1481,7 @@ LAMMPS_NS::Compute * LammpsInterface::const_to_active(COMPUTE_POINTER computePoi
|
||||
// compute pe/atom interface methods
|
||||
// - the only compute "owned" by ATC
|
||||
// -----------------------------------------------------------------
|
||||
int LammpsInterface::create_compute_pe_peratom(void) const
|
||||
int LammpsInterface::create_compute_pe_peratom() const
|
||||
{
|
||||
char **list = new char*[4];
|
||||
string atomPeName = compute_pe_name();
|
||||
@ -1493,7 +1506,7 @@ int LammpsInterface::create_compute_pe_peratom(void) const
|
||||
return icompute;
|
||||
}
|
||||
|
||||
double * LammpsInterface::compute_pe_peratom(void) const
|
||||
double * LammpsInterface::compute_pe_peratom() const
|
||||
{
|
||||
if (atomPE_) {
|
||||
atomPE_->compute_peratom();
|
||||
|
||||
@ -84,7 +84,7 @@ LinearSolver::LinearSolver(
|
||||
// --------------------------------------------------------------------
|
||||
// Setup
|
||||
// --------------------------------------------------------------------
|
||||
void LinearSolver::setup(void)
|
||||
void LinearSolver::setup()
|
||||
{
|
||||
tol_ = kTol;
|
||||
nVariables_ = matrix_.nRows();
|
||||
@ -113,7 +113,7 @@ void LinearSolver::setup(void)
|
||||
// --------------------------------------------------------------------
|
||||
// Initialize
|
||||
// --------------------------------------------------------------------
|
||||
void LinearSolver::allow_reinitialization(void)
|
||||
void LinearSolver::allow_reinitialization()
|
||||
{
|
||||
if (constraintHandlerType_ == PENALIZE_CONSTRAINTS) {
|
||||
if (matrixModified_ ) throw ATC_Error("LinearSolver: can't allow reinitialization after matrix has been modified");
|
||||
@ -157,7 +157,7 @@ void LinearSolver::initialize(const BC_SET * bcs)
|
||||
// --------------------------------------------------------------------
|
||||
// initialize_matrix
|
||||
// --------------------------------------------------------------------
|
||||
void LinearSolver::initialize_matrix(void)
|
||||
void LinearSolver::initialize_matrix()
|
||||
{
|
||||
if ( initializedMatrix_ ) return;
|
||||
if (constraintHandlerType_ == PENALIZE_CONSTRAINTS) {
|
||||
@ -172,7 +172,7 @@ void LinearSolver::initialize_matrix(void)
|
||||
// --------------------------------------------------------------------
|
||||
// initialize_inverse
|
||||
// --------------------------------------------------------------------
|
||||
void LinearSolver::initialize_inverse(void)
|
||||
void LinearSolver::initialize_inverse()
|
||||
{
|
||||
if ( initializedInverse_ ) return;
|
||||
if (solverType_ == ITERATIVE_SOLVE_SYMMETRIC
|
||||
@ -196,7 +196,7 @@ void LinearSolver::initialize_inverse(void)
|
||||
// --------------------------------------------------------------------
|
||||
// initialize_rhs
|
||||
// --------------------------------------------------------------------
|
||||
void LinearSolver::initialize_rhs(void)
|
||||
void LinearSolver::initialize_rhs()
|
||||
{
|
||||
if (! rhs_ ) return;
|
||||
if (! bcs_ ) {
|
||||
@ -215,7 +215,7 @@ void LinearSolver::initialize_rhs(void)
|
||||
// add matrix penalty
|
||||
// - change matrix for Dirichlet conditions: add penalty
|
||||
// --------------------------------------------------------------------
|
||||
void LinearSolver::add_matrix_penalty(void)
|
||||
void LinearSolver::add_matrix_penalty()
|
||||
{
|
||||
penalty_ = kPenalty; // relative to matrix diagonal
|
||||
SPAR_MAT & A = matrixCopy_;
|
||||
@ -233,7 +233,7 @@ void LinearSolver::add_matrix_penalty(void)
|
||||
// partition matrix
|
||||
// - partition matrix based on Dirichlet constraints
|
||||
// --------------------------------------------------------------------
|
||||
void LinearSolver::partition_matrix(void)
|
||||
void LinearSolver::partition_matrix()
|
||||
{
|
||||
fixedSet_.clear();
|
||||
BC_SET::const_iterator itr;
|
||||
|
||||
@ -172,7 +172,7 @@ void OutputManager::read_restart_file(string fileName, RESTART_LIST *data)
|
||||
//-----------------------------------------------------------------------------
|
||||
//*
|
||||
//-----------------------------------------------------------------------------
|
||||
void OutputManager::write_globals(void)
|
||||
void OutputManager::write_globals()
|
||||
{
|
||||
if ( outputPrefix_ == "NULL") return;
|
||||
string file = outputPrefix_ + ".GLOBALS";
|
||||
@ -224,7 +224,7 @@ void OutputManager::write_geometry(const MATRIX *coordinates,
|
||||
//-----------------------------------------------------------------------------
|
||||
//*
|
||||
//-----------------------------------------------------------------------------
|
||||
void OutputManager::write_geometry_ensight(void)
|
||||
void OutputManager::write_geometry_ensight()
|
||||
{
|
||||
// geometry based on a reference configuration
|
||||
string geom_file_name = outputPrefix_ + ".geo";
|
||||
@ -327,7 +327,7 @@ void OutputManager::write_geometry_ensight(void)
|
||||
//-----------------------------------------------------------------------------
|
||||
//*
|
||||
//-----------------------------------------------------------------------------
|
||||
void OutputManager::write_geometry_text(void)
|
||||
void OutputManager::write_geometry_text()
|
||||
{
|
||||
if ( outputPrefix_ == "NULL") return;
|
||||
// geometry based on a reference configuration
|
||||
|
||||
@ -18,7 +18,7 @@ PairMap::PairMap(LammpsInterface * lammpsInterface, int groupbit ):
|
||||
nPairs_(0), nBonds_(0)
|
||||
{
|
||||
};
|
||||
PairMap::~PairMap(void)
|
||||
PairMap::~PairMap()
|
||||
{
|
||||
};
|
||||
//==========================================================
|
||||
@ -27,7 +27,7 @@ PairMapNeighbor::PairMapNeighbor(LammpsInterface * lammpsInterface, int groupbit
|
||||
{
|
||||
};
|
||||
|
||||
void PairMapNeighbor::reset(void) const
|
||||
void PairMapNeighbor::reset() const
|
||||
{
|
||||
int inum = lammpsInterface_->neighbor_list_inum();
|
||||
int *ilist = lammpsInterface_->neighbor_list_ilist();
|
||||
@ -90,7 +90,7 @@ PairVirialEulerian::PairVirialEulerian(LammpsInterface * lammpsInterface,
|
||||
};
|
||||
|
||||
|
||||
void PairVirialEulerian::reset(void) const
|
||||
void PairVirialEulerian::reset() const
|
||||
{
|
||||
int nPairs = pairMap_.size();
|
||||
quantity_.reset(nPairs,nCols_);
|
||||
@ -129,7 +129,7 @@ PairVirialLagrangian::PairVirialLagrangian(LammpsInterface * lammpsInterface,
|
||||
};
|
||||
|
||||
|
||||
void PairVirialLagrangian::reset(void) const
|
||||
void PairVirialLagrangian::reset() const
|
||||
{
|
||||
int nPairs = pairMap_.size();
|
||||
quantity_.reset(nPairs,nCols_);
|
||||
@ -181,7 +181,7 @@ PairPotentialHeatFluxEulerian::PairPotentialHeatFluxEulerian(LammpsInterface * l
|
||||
|
||||
};
|
||||
|
||||
void PairPotentialHeatFluxEulerian::reset(void) const
|
||||
void PairPotentialHeatFluxEulerian::reset() const
|
||||
{
|
||||
int nPairs = pairMap_.size();
|
||||
quantity_.reset(nPairs,nCols_);
|
||||
@ -217,7 +217,7 @@ PairPotentialHeatFluxLagrangian::PairPotentialHeatFluxLagrangian(LammpsInterface
|
||||
|
||||
};
|
||||
|
||||
void PairPotentialHeatFluxLagrangian::reset(void) const
|
||||
void PairPotentialHeatFluxLagrangian::reset() const
|
||||
{
|
||||
int nPairs = pairMap_.size();
|
||||
quantity_.reset(nPairs,nCols_);
|
||||
@ -275,7 +275,7 @@ BondMatrixKernel::BondMatrixKernel(LammpsInterface * lammpsInterface,
|
||||
if (kernelFunction_ == nullptr)
|
||||
throw ATC_Error("No AtC kernel function initialized");
|
||||
};
|
||||
void BondMatrixKernel::reset(void) const
|
||||
void BondMatrixKernel::reset() const
|
||||
{
|
||||
int nPairs = pairMap_.size(); // needs to come after quantity for reset
|
||||
int nNodes = feMesh_->num_nodes_unique();
|
||||
@ -328,7 +328,7 @@ BondMatrixPartitionOfUnity::BondMatrixPartitionOfUnity(LammpsInterface * lammpsI
|
||||
lineWg_[i] *= 0.5;
|
||||
}
|
||||
};
|
||||
void BondMatrixPartitionOfUnity::reset(void) const
|
||||
void BondMatrixPartitionOfUnity::reset() const
|
||||
{
|
||||
int nNodes = feMesh_->num_nodes_unique();
|
||||
int nPairs = pairMap_.size();
|
||||
|
||||
@ -108,7 +108,7 @@ void PhysicsModel::parse_material_file(string fileName)
|
||||
fileId.close();
|
||||
}
|
||||
|
||||
void PhysicsModel::initialize(void)
|
||||
void PhysicsModel::initialize()
|
||||
{
|
||||
// initialize materials
|
||||
vector< Material* >::const_iterator iter;
|
||||
|
||||
@ -111,7 +111,7 @@ PoissonSolver::~PoissonSolver()
|
||||
// --------------------------------------------------------------------
|
||||
// Initialize
|
||||
// --------------------------------------------------------------------
|
||||
void PoissonSolver::initialize(void)
|
||||
void PoissonSolver::initialize()
|
||||
{
|
||||
nNodes_ = feEngine_->num_nodes();
|
||||
|
||||
|
||||
@ -523,7 +523,7 @@ namespace ATC {
|
||||
// print
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
void PrescribedDataManager::print(void)
|
||||
void PrescribedDataManager::print()
|
||||
{
|
||||
// print and check consistency
|
||||
enum dataType {FREE=0,FIELD,SOURCE};
|
||||
|
||||
@ -735,7 +735,7 @@ double fermi_dirac(const double E, const double T)
|
||||
else
|
||||
ATC_Error("schrodinger-poisson solver:too many fixed");
|
||||
}
|
||||
GlobalSliceSchrodingerPoissonSolver::~GlobalSliceSchrodingerPoissonSolver(void) {
|
||||
GlobalSliceSchrodingerPoissonSolver::~GlobalSliceSchrodingerPoissonSolver() {
|
||||
if (solver_) delete solver_;
|
||||
}
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
@ -237,7 +237,7 @@ StressCubicElastic::StressCubicElastic(fstream &fileId)
|
||||
}
|
||||
}
|
||||
|
||||
void StressCubicElastic::set_tangent(void)
|
||||
void StressCubicElastic::set_tangent()
|
||||
{
|
||||
C_.reset(6,6);
|
||||
C_(0,0)=C_(1,1)=C_(2,2) =c11_;
|
||||
@ -374,7 +374,7 @@ StressCauchyBorn::~StressCauchyBorn()
|
||||
//==============================================================================
|
||||
// initialize
|
||||
//==============================================================================
|
||||
void StressCauchyBorn::initialize(void)
|
||||
void StressCauchyBorn::initialize()
|
||||
{
|
||||
if (!initialized_) {
|
||||
if (makeLinear_) linearize();
|
||||
@ -393,7 +393,7 @@ void StressCauchyBorn::initialize(void)
|
||||
//==============================================================================
|
||||
// compute the bond stiffness consistent with the einstein freq
|
||||
//==============================================================================
|
||||
double StressCauchyBorn::stiffness(void) const
|
||||
double StressCauchyBorn::stiffness() const
|
||||
{
|
||||
AtomCluster vac;
|
||||
cblattice_->atom_cluster(eye<double>(3,3), potential_->cutoff_radius(), vac);
|
||||
|
||||
@ -19,7 +19,7 @@ WeakEquationChargeDiffusion::WeakEquationChargeDiffusion()
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//--------------------------------------------------------------
|
||||
WeakEquationChargeDiffusion::~WeakEquationChargeDiffusion(void)
|
||||
WeakEquationChargeDiffusion::~WeakEquationChargeDiffusion()
|
||||
{}
|
||||
//---------------------------------------------------------------------
|
||||
// compute capacity
|
||||
|
||||
@ -19,7 +19,7 @@ WeakEquationDiffusion::WeakEquationDiffusion()
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//--------------------------------------------------------------
|
||||
WeakEquationDiffusion::~WeakEquationDiffusion(void)
|
||||
WeakEquationDiffusion::~WeakEquationDiffusion()
|
||||
{}
|
||||
//---------------------------------------------------------------------
|
||||
// compute capacity
|
||||
|
||||
@ -18,7 +18,7 @@ WeakEquationElectronContinuity::WeakEquationElectronContinuity()
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//---------------------------------------------------------------------
|
||||
WeakEquationElectronContinuity::~WeakEquationElectronContinuity(void)
|
||||
WeakEquationElectronContinuity::~WeakEquationElectronContinuity()
|
||||
{}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
@ -66,7 +66,7 @@ WeakEquationElectronEquilibrium::WeakEquationElectronEquilibrium()
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//---------------------------------------------------------------------
|
||||
WeakEquationElectronEquilibrium::~WeakEquationElectronEquilibrium(void)
|
||||
WeakEquationElectronEquilibrium::~WeakEquationElectronEquilibrium()
|
||||
{}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
|
||||
@ -18,7 +18,7 @@ WeakEquationElectronTemperature::WeakEquationElectronTemperature()
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//---------------------------------------------------------------------
|
||||
WeakEquationElectronTemperature::~WeakEquationElectronTemperature(void)
|
||||
WeakEquationElectronTemperature::~WeakEquationElectronTemperature()
|
||||
{}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
@ -93,7 +93,7 @@ WeakEquationElectronTemperatureJouleHeating::WeakEquationElectronTemperatureJoul
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//---------------------------------------------------------------------
|
||||
WeakEquationElectronTemperatureJouleHeating::~WeakEquationElectronTemperatureJouleHeating(void)
|
||||
WeakEquationElectronTemperatureJouleHeating::~WeakEquationElectronTemperatureJouleHeating()
|
||||
{}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
@ -162,7 +162,7 @@ WeakEquationElectronTemperatureConvection::WeakEquationElectronTemperatureConvec
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//---------------------------------------------------------------------
|
||||
WeakEquationElectronTemperatureConvection::~WeakEquationElectronTemperatureConvection(void)
|
||||
WeakEquationElectronTemperatureConvection::~WeakEquationElectronTemperatureConvection()
|
||||
{
|
||||
// do nothing
|
||||
}
|
||||
|
||||
@ -19,7 +19,7 @@ WeakEquationMassDiffusion::WeakEquationMassDiffusion()
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//--------------------------------------------------------------
|
||||
WeakEquationMassDiffusion::~WeakEquationMassDiffusion(void)
|
||||
WeakEquationMassDiffusion::~WeakEquationMassDiffusion()
|
||||
{}
|
||||
//---------------------------------------------------------------------
|
||||
// compute capacity
|
||||
|
||||
@ -19,7 +19,7 @@ WeakEquationPhononTemperature::WeakEquationPhononTemperature()
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//--------------------------------------------------------------
|
||||
WeakEquationPhononTemperature::~WeakEquationPhononTemperature(void)
|
||||
WeakEquationPhononTemperature::~WeakEquationPhononTemperature()
|
||||
{}
|
||||
//---------------------------------------------------------------------
|
||||
// compute total energy
|
||||
@ -67,7 +67,7 @@ WeakEquationPhononTemperatureExchange::WeakEquationPhononTemperatureExchange()
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//---------------------------------------------------------------------
|
||||
WeakEquationPhononTemperatureExchange::~WeakEquationPhononTemperatureExchange(void)
|
||||
WeakEquationPhononTemperatureExchange::~WeakEquationPhononTemperatureExchange()
|
||||
{}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
|
||||
@ -18,7 +18,7 @@ WeakEquationSchrodinger::WeakEquationSchrodinger()
|
||||
//--------------------------------------------------------------
|
||||
// Destructor
|
||||
//---------------------------------------------------------------------
|
||||
WeakEquationSchrodinger::~WeakEquationSchrodinger(void)
|
||||
WeakEquationSchrodinger::~WeakEquationSchrodinger()
|
||||
{}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
|
||||
@ -13,14 +13,6 @@ endif
|
||||
|
||||
NVCC = nvcc
|
||||
|
||||
# obsolete hardware. not supported by current drivers anymore.
|
||||
#CUDA_ARCH = -arch=sm_13
|
||||
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
|
||||
|
||||
# Fermi hardware
|
||||
#CUDA_ARCH = -arch=sm_20
|
||||
#CUDA_ARCH = -arch=sm_21
|
||||
|
||||
# Kepler hardware
|
||||
#CUDA_ARCH = -arch=sm_30
|
||||
#CUDA_ARCH = -arch=sm_32
|
||||
@ -45,6 +37,9 @@ CUDA_ARCH = -arch=sm_50
|
||||
#CUDA_ARCH = -arch=sm_80
|
||||
#CUDA_ARCH = -arch=sm_86
|
||||
|
||||
# Hopper hardware
|
||||
#CUDA_ARCH = -arch=sm_90
|
||||
|
||||
CUDA_CODE = -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] \
|
||||
-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] \
|
||||
-gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_75,code=[sm_75,compute_75] \
|
||||
|
||||
@ -171,7 +171,13 @@ NOTE: when compiling with CMake, all of the considerations listed below
|
||||
are considered within the CMake configuration process, so no separate
|
||||
compilation of the gpu library is required. Also this will build in support
|
||||
for all compute architecture that are supported by the CUDA toolkit version
|
||||
used to build the gpu library.
|
||||
used to build the gpu library. A similar setup is possible using
|
||||
Makefile.linux_multi after adjusting the settings for the CUDA toolkit in use.
|
||||
|
||||
Only CUDA toolkit version 8.0 and later and only GPU architecture 3.0
|
||||
(aka Kepler) and later are supported by this version of LAMMPS. If you want
|
||||
to use older hard- or software you have to compile for OpenCL or use an older
|
||||
version of LAMMPS.
|
||||
|
||||
If you do not want to use a fat binary, that supports multiple CUDA
|
||||
architectures, the CUDA_ARCH must be set to match the GPU architecture. This
|
||||
@ -225,7 +231,8 @@ If GERYON_NUMA_FISSION is defined at build time, LAMMPS will consider separate
|
||||
NUMA nodes on GPUs or accelerators as separate devices. For example, a 2-socket
|
||||
CPU would appear as two separate devices for OpenCL (and LAMMPS would require
|
||||
two MPI processes to use both sockets with the GPU library - each with its
|
||||
own device ID as output by ocl_get_devices).
|
||||
own device ID as output by ocl_get_devices). OpenCL version 1.2 or later is
|
||||
required.
|
||||
|
||||
For a debug build, use "-DUCL_DEBUG -DGERYON_KERNEL_DUMP" and remove
|
||||
"-DUCL_NO_EXIT" and "-DMPI_GERYON" from the build options.
|
||||
|
||||
@ -379,18 +379,9 @@ UCL_Device::UCL_Device() {
|
||||
prop.regsPerBlock = hip_prop.regsPerBlock;
|
||||
prop.clockRate = hip_prop.clockRate;
|
||||
prop.computeMode = hip_prop.computeMode;
|
||||
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev));
|
||||
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
|
||||
|
||||
//#if CUDA_VERSION >= 2020
|
||||
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.integrated, hipDeviceAttributeIntegrated, dev));
|
||||
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
|
||||
//#endif
|
||||
//#if CUDA_VERSION >= 3010
|
||||
prop.concurrentKernels = hip_prop.concurrentKernels;
|
||||
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
|
||||
//#endif
|
||||
|
||||
_properties.push_back(prop);
|
||||
}
|
||||
@ -447,13 +438,11 @@ void UCL_Device::clear() {
|
||||
|
||||
// List all devices along with all properties
|
||||
void UCL_Device::print_all(std::ostream &out) {
|
||||
//#if CUDA_VERSION >= 2020
|
||||
int driver_version;
|
||||
hipDriverGetVersion(&driver_version);
|
||||
out << "Driver Version: "
|
||||
<< driver_version/1000 << "." << driver_version%100
|
||||
<< std::endl;
|
||||
//#endif
|
||||
|
||||
if (num_devices() == 0)
|
||||
out << "There is no device supporting HIP\n";
|
||||
@ -470,12 +459,10 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
out << "No\n";
|
||||
out << " Total amount of global memory: "
|
||||
<< gigabytes(i) << " GB\n";
|
||||
//#if CUDA_VERSION >= 2000
|
||||
out << " Number of compute units/multiprocessors: "
|
||||
<< _properties[i].multiProcessorCount << std::endl;
|
||||
out << " Number of cores: "
|
||||
<< cores(i) << std::endl;
|
||||
//#endif
|
||||
out << " Total amount of constant memory: "
|
||||
<< _properties[i].totalConstantMemory << " bytes\n";
|
||||
out << " Total amount of local/shared memory per block: "
|
||||
@ -494,58 +481,29 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
<< _properties[i].maxGridSize[0] << " x "
|
||||
<< _properties[i].maxGridSize[1] << " x "
|
||||
<< _properties[i].maxGridSize[2] << std::endl;
|
||||
//out << " Maximum memory pitch: "
|
||||
// << max_pitch(i) << " bytes\n";
|
||||
//out << " Texture alignment: "
|
||||
// << _properties[i].textureAlign << " bytes\n";
|
||||
out << " Clock rate: "
|
||||
<< clock_rate(i) << " GHz\n";
|
||||
//#if CUDA_VERSION >= 2020
|
||||
//out << " Run time limit on kernels: ";
|
||||
//if (_properties[i].kernelExecTimeoutEnabled)
|
||||
// out << "Yes\n";
|
||||
//else
|
||||
// out << "No\n";
|
||||
out << " Integrated: ";
|
||||
if (_properties[i].integrated)
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
//out << " Support host page-locked memory mapping: ";
|
||||
//if (_properties[i].canMapHostMemory)
|
||||
// out << "Yes\n";
|
||||
//else
|
||||
// out << "No\n";
|
||||
out << " Compute mode: ";
|
||||
if (_properties[i].computeMode == hipComputeModeDefault)
|
||||
out << "Default\n"; // multiple threads can use device
|
||||
//#if CUDA_VERSION >= 8000
|
||||
// else if (_properties[i].computeMode == hipComputeModeExclusiveProcess)
|
||||
//#else
|
||||
else if (_properties[i].computeMode == hipComputeModeExclusive)
|
||||
//#endif
|
||||
out << "Exclusive\n"; // only thread can use device
|
||||
else if (_properties[i].computeMode == hipComputeModeProhibited)
|
||||
out << "Prohibited\n"; // no thread can use device
|
||||
//#if CUDART_VERSION >= 4000
|
||||
else if (_properties[i].computeMode == hipComputeModeExclusiveProcess)
|
||||
out << "Exclusive Process\n"; // multiple threads 1 process
|
||||
//#endif
|
||||
else
|
||||
out << "Unknown\n";
|
||||
//#endif
|
||||
//#if CUDA_VERSION >= 3010
|
||||
out << " Concurrent kernel execution: ";
|
||||
if (_properties[i].concurrentKernels)
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
//out << " Device has ECC support enabled: ";
|
||||
//if (_properties[i].ECCEnabled)
|
||||
// out << "Yes\n";
|
||||
//else
|
||||
// out << "No\n";
|
||||
//#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -5,11 +5,7 @@
|
||||
#include <cassert>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
//#if CUDA_VERSION >= 3020
|
||||
#define CUDA_INT_TYPE size_t
|
||||
//#else
|
||||
//#define CUDA_INT_TYPE unsigned
|
||||
//#endif
|
||||
|
||||
#ifdef MPI_GERYON
|
||||
#include "mpi.h"
|
||||
|
||||
@ -71,9 +71,6 @@ class UCL_Texture {
|
||||
|
||||
/// Make a texture reference available to kernel
|
||||
inline void allow(UCL_Kernel &) {
|
||||
//#if CUDA_VERSION < 4000
|
||||
//CU_SAFE_CALL(cuParamSetTexRef(kernel._kernel, CU_PARAM_TR_DEFAULT, _tex));
|
||||
//#endif
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@ -320,6 +320,9 @@ class UCL_Device {
|
||||
|
||||
// Grabs the properties for all devices
|
||||
UCL_Device::UCL_Device() {
|
||||
#if CUDA_VERSION < 8000
|
||||
#error CUDA Toolkit version 8 or later required
|
||||
#endif
|
||||
CU_SAFE_CALL_NS(cuInit(0));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetCount(&_num_devices));
|
||||
for (int i=0; i<_num_devices; ++i) {
|
||||
@ -358,16 +361,12 @@ UCL_Device::UCL_Device() {
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
|
||||
|
||||
#if CUDA_VERSION >= 2020
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,dev));
|
||||
#endif
|
||||
#if CUDA_VERSION >= 3010
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.concurrentKernels, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
|
||||
#endif
|
||||
|
||||
_properties.push_back(prop);
|
||||
}
|
||||
@ -415,13 +414,10 @@ void UCL_Device::clear() {
|
||||
|
||||
// List all devices along with all properties
|
||||
void UCL_Device::print_all(std::ostream &out) {
|
||||
#if CUDA_VERSION >= 2020
|
||||
int driver_version;
|
||||
cuDriverGetVersion(&driver_version);
|
||||
out << "CUDA Driver Version: "
|
||||
<< driver_version/1000 << "." << driver_version%100
|
||||
<< std::endl;
|
||||
#endif
|
||||
<< driver_version/1000 << "." << driver_version%100 << std::endl;
|
||||
|
||||
if (num_devices() == 0)
|
||||
out << "There is no device supporting CUDA\n";
|
||||
@ -438,12 +434,10 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
out << "No\n";
|
||||
out << " Total amount of global memory: "
|
||||
<< gigabytes(i) << " GB\n";
|
||||
#if CUDA_VERSION >= 2000
|
||||
out << " Number of compute units/multiprocessors: "
|
||||
<< _properties[i].multiProcessorCount << std::endl;
|
||||
out << " Number of cores: "
|
||||
<< cores(i) << std::endl;
|
||||
#endif
|
||||
out << " Total amount of constant memory: "
|
||||
<< _properties[i].totalConstantMemory << " bytes\n";
|
||||
out << " Total amount of local/shared memory per block: "
|
||||
@ -468,7 +462,6 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
<< _properties[i].textureAlign << " bytes\n";
|
||||
out << " Clock rate: "
|
||||
<< clock_rate(i) << " GHz\n";
|
||||
#if CUDA_VERSION >= 2020
|
||||
out << " Run time limit on kernels: ";
|
||||
if (_properties[i].kernelExecTimeoutEnabled)
|
||||
out << "Yes\n";
|
||||
@ -487,22 +480,14 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
out << " Compute mode: ";
|
||||
if (_properties[i].computeMode == CU_COMPUTEMODE_DEFAULT)
|
||||
out << "Default\n"; // multiple threads can use device
|
||||
#if CUDA_VERSION >= 8000
|
||||
else if (_properties[i].computeMode == CU_COMPUTEMODE_EXCLUSIVE_PROCESS)
|
||||
#else
|
||||
else if (_properties[i].computeMode == CU_COMPUTEMODE_EXCLUSIVE)
|
||||
#endif
|
||||
out << "Exclusive\n"; // only thread can use device
|
||||
else if (_properties[i].computeMode == CU_COMPUTEMODE_PROHIBITED)
|
||||
out << "Prohibited\n"; // no thread can use device
|
||||
#if CUDART_VERSION >= 4000
|
||||
else if (_properties[i].computeMode == CU_COMPUTEMODE_EXCLUSIVE_PROCESS)
|
||||
out << "Exclusive Process\n"; // multiple threads 1 process
|
||||
#endif
|
||||
else
|
||||
out << "Unknown\n";
|
||||
#endif
|
||||
#if CUDA_VERSION >= 3010
|
||||
out << " Concurrent kernel execution: ";
|
||||
if (_properties[i].concurrentKernels)
|
||||
out << "Yes\n";
|
||||
@ -513,7 +498,6 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -165,17 +165,11 @@ class UCL_Program {
|
||||
class UCL_Kernel {
|
||||
public:
|
||||
UCL_Kernel() : _dimensions(1), _num_args(0) {
|
||||
#if CUDA_VERSION < 4000
|
||||
_param_size=0;
|
||||
#endif
|
||||
_num_blocks[0]=0;
|
||||
}
|
||||
|
||||
UCL_Kernel(UCL_Program &program, const char *function) :
|
||||
_dimensions(1), _num_args(0) {
|
||||
#if CUDA_VERSION < 4000
|
||||
_param_size=0;
|
||||
#endif
|
||||
_num_blocks[0]=0;
|
||||
set_function(program,function);
|
||||
_cq=program._cq;
|
||||
@ -211,11 +205,7 @@ class UCL_Kernel {
|
||||
if (index==_num_args)
|
||||
add_arg(arg);
|
||||
else if (index<_num_args)
|
||||
#if CUDA_VERSION >= 4000
|
||||
_kernel_args[index]=arg;
|
||||
#else
|
||||
CU_SAFE_CALL(cuParamSetv(_kernel, _offsets[index], arg, sizeof(dtype)));
|
||||
#endif
|
||||
else
|
||||
assert(0==1); // Must add kernel parameters in sequential order
|
||||
}
|
||||
@ -242,15 +232,7 @@ class UCL_Kernel {
|
||||
|
||||
/// Add a kernel argument.
|
||||
inline void add_arg(const CUdeviceptr* const arg) {
|
||||
#if CUDA_VERSION >= 4000
|
||||
_kernel_args[_num_args]=(void *)arg;
|
||||
#else
|
||||
void* ptr = (void*)(size_t)(*arg);
|
||||
_param_size = (_param_size + __alignof(ptr) - 1) & ~(__alignof(ptr) - 1);
|
||||
CU_SAFE_CALL(cuParamSetv(_kernel, _param_size, &ptr, sizeof(ptr)));
|
||||
_offsets.push_back(_param_size);
|
||||
_param_size+=sizeof(ptr);
|
||||
#endif
|
||||
_num_args++;
|
||||
if (_num_args>UCL_MAX_KERNEL_ARGS) assert(0==1);
|
||||
}
|
||||
@ -258,14 +240,7 @@ class UCL_Kernel {
|
||||
/// Add a kernel argument.
|
||||
template <class dtype>
|
||||
inline void add_arg(const dtype* const arg) {
|
||||
#if CUDA_VERSION >= 4000
|
||||
_kernel_args[_num_args]=const_cast<dtype *>(arg);
|
||||
#else
|
||||
_param_size = (_param_size+__alignof(dtype)-1) & ~(__alignof(dtype)-1);
|
||||
CU_SAFE_CALL(cuParamSetv(_kernel,_param_size,(void*)arg,sizeof(dtype)));
|
||||
_offsets.push_back(_param_size);
|
||||
_param_size+=sizeof(dtype);
|
||||
#endif
|
||||
_num_args++;
|
||||
if (_num_args>UCL_MAX_KERNEL_ARGS) assert(0==1);
|
||||
}
|
||||
@ -298,13 +273,9 @@ class UCL_Kernel {
|
||||
_num_blocks[0]=num_blocks;
|
||||
_num_blocks[1]=1;
|
||||
_num_blocks[2]=1;
|
||||
#if CUDA_VERSION >= 4000
|
||||
_block_size[0]=block_size;
|
||||
_block_size[1]=1;
|
||||
_block_size[2]=1;
|
||||
#else
|
||||
CU_SAFE_CALL(cuFuncSetBlockShape(_kernel,block_size,1,1));
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Set the number of thread blocks and the number of threads in each block
|
||||
@ -323,13 +294,9 @@ class UCL_Kernel {
|
||||
_num_blocks[0]=num_blocks_x;
|
||||
_num_blocks[1]=num_blocks_y;
|
||||
_num_blocks[2]=1;
|
||||
#if CUDA_VERSION >= 4000
|
||||
_block_size[0]=block_size_x;
|
||||
_block_size[1]=block_size_y;
|
||||
_block_size[2]=1;
|
||||
#else
|
||||
CU_SAFE_CALL(cuFuncSetBlockShape(_kernel,block_size_x,block_size_y,1));
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Set the number of thread blocks and the number of threads in each block
|
||||
@ -350,14 +317,9 @@ class UCL_Kernel {
|
||||
_num_blocks[0]=num_blocks_x;
|
||||
_num_blocks[1]=num_blocks_y;
|
||||
_num_blocks[2]=1;
|
||||
#if CUDA_VERSION >= 4000
|
||||
_block_size[0]=block_size_x;
|
||||
_block_size[1]=block_size_y;
|
||||
_block_size[2]=block_size_z;
|
||||
#else
|
||||
CU_SAFE_CALL(cuFuncSetBlockShape(_kernel,block_size_x,block_size_y,
|
||||
block_size_z));
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Set the number of thread blocks and the number of threads in each block
|
||||
@ -373,23 +335,14 @@ class UCL_Kernel {
|
||||
|
||||
/// Run the kernel in the default command queue
|
||||
inline void run() {
|
||||
#if CUDA_VERSION >= 4000
|
||||
CU_SAFE_CALL(cuLaunchKernel(_kernel,_num_blocks[0],_num_blocks[1],
|
||||
_num_blocks[2],_block_size[0],_block_size[1],
|
||||
_block_size[2],0,_cq,_kernel_args,nullptr));
|
||||
#else
|
||||
CU_SAFE_CALL(cuParamSetSize(_kernel,_param_size));
|
||||
CU_SAFE_CALL(cuLaunchGridAsync(_kernel,_num_blocks[0],_num_blocks[1],_cq));
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Clear any arguments associated with the kernel
|
||||
inline void clear_args() {
|
||||
_num_args=0;
|
||||
#if CUDA_VERSION < 4000
|
||||
_offsets.clear();
|
||||
_param_size=0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Return the default command queue/stream associated with this data
|
||||
@ -406,13 +359,8 @@ class UCL_Kernel {
|
||||
unsigned _num_args;
|
||||
friend class UCL_Texture;
|
||||
|
||||
#if CUDA_VERSION >= 4000
|
||||
unsigned _block_size[3];
|
||||
void * _kernel_args[UCL_MAX_KERNEL_ARGS];
|
||||
#else
|
||||
std::vector<unsigned> _offsets;
|
||||
unsigned _param_size;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
@ -5,11 +5,7 @@
|
||||
#include <cassert>
|
||||
#include <cuda.h>
|
||||
|
||||
#if CUDA_VERSION >= 3020
|
||||
#define CUDA_INT_TYPE size_t
|
||||
#else
|
||||
#define CUDA_INT_TYPE unsigned
|
||||
#endif
|
||||
|
||||
#ifdef MPI_GERYON
|
||||
#include "mpi.h"
|
||||
|
||||
@ -69,9 +69,6 @@ class UCL_Texture {
|
||||
|
||||
/// Make a texture reference available to kernel
|
||||
inline void allow(UCL_Kernel &kernel) {
|
||||
#if CUDA_VERSION < 4000
|
||||
CU_SAFE_CALL(cuParamSetTexRef(kernel._kernel, CU_PARAM_TR_DEFAULT, _tex));
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@ -41,7 +41,7 @@ using namespace ucl_cudart;
|
||||
using namespace ucl_hip;
|
||||
#endif
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int main(int /*argc*/, char** /*argv*/) {
|
||||
UCL_Device cop;
|
||||
std::cout << "Found " << cop.num_platforms() << " platform(s).\n";
|
||||
if (cop.num_platforms()>0)
|
||||
|
||||
@ -25,21 +25,8 @@
|
||||
#ifndef UCL_NV_KERNEL_H
|
||||
#define UCL_NV_KERNEL_H
|
||||
|
||||
#if (__CUDA_ARCH__ < 200)
|
||||
#define mul24 __mul24
|
||||
#define MEM_THREADS 16
|
||||
#else
|
||||
#define mul24(X,Y) (X)*(Y)
|
||||
#define MEM_THREADS 32
|
||||
#endif
|
||||
|
||||
#ifdef CUDA_PRE_THREE
|
||||
struct __builtin_align__(16) _double4
|
||||
{
|
||||
double x, y, z, w;
|
||||
};
|
||||
typedef struct _double4 double4;
|
||||
#endif
|
||||
|
||||
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
|
||||
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
|
||||
|
||||
@ -94,13 +94,13 @@ bool AnswerT::init(const int inum, const bool charge, const bool rot,
|
||||
template <class numtyp, class acctyp>
|
||||
bool AnswerT::add_fields(const bool charge, const bool rot) {
|
||||
bool realloc=false;
|
||||
if (charge && _charge==false) {
|
||||
if (charge && !_charge) {
|
||||
_charge=true;
|
||||
_e_fields++;
|
||||
_ev_fields++;
|
||||
realloc=true;
|
||||
}
|
||||
if (rot && _rot==false) {
|
||||
if (rot && !_rot) {
|
||||
_rot=true;
|
||||
realloc=true;
|
||||
}
|
||||
@ -163,10 +163,8 @@ void AnswerT::copy_answers(const bool eflag, const bool vflag,
|
||||
#endif
|
||||
|
||||
int csize=_ev_fields;
|
||||
if (!eflag)
|
||||
csize-=_e_fields;
|
||||
if (!vflag)
|
||||
csize-=6;
|
||||
if (!eflag) csize-=_e_fields;
|
||||
if (!vflag) csize-=6;
|
||||
|
||||
if (csize>0)
|
||||
engv.update_host(_ev_stride*csize,true);
|
||||
@ -192,8 +190,7 @@ void AnswerT::copy_answers(const bool eflag, const bool vflag,
|
||||
template <class numtyp, class acctyp>
|
||||
double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
double *virial) {
|
||||
if (_eflag==false && _vflag==false)
|
||||
return 0.0;
|
||||
if (!_eflag && !_vflag) return 0.0;
|
||||
|
||||
double evdwl=0.0;
|
||||
int vstart=0;
|
||||
@ -241,11 +238,9 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
template <class numtyp, class acctyp>
|
||||
double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
double *virial, double &ecoul) {
|
||||
if (_eflag==false && _vflag==false)
|
||||
return 0.0;
|
||||
if (!_eflag && !_vflag) return 0.0;
|
||||
|
||||
if (_charge==false)
|
||||
return energy_virial(eatom,vatom,virial);
|
||||
if (!_charge) return energy_virial(eatom,vatom,virial);
|
||||
|
||||
double evdwl=0.0;
|
||||
int vstart=0, iend=_ev_stride;
|
||||
@ -305,8 +300,8 @@ void AnswerT::get_answers(double **f, double **tor) {
|
||||
if (_ilist==nullptr) {
|
||||
typedef struct { double x,y,z; } vec3d;
|
||||
typedef struct { acctyp x,y,z,w; } vec4d_t;
|
||||
vec3d *fp=reinterpret_cast<vec3d*>(&(f[0][0]));
|
||||
vec4d_t *forcep=reinterpret_cast<vec4d_t*>(&(force[0]));
|
||||
auto fp=reinterpret_cast<vec3d*>(&(f[0][0]));
|
||||
auto forcep=reinterpret_cast<vec4d_t*>(&(force[0]));
|
||||
|
||||
#if (LAL_USE_OMP == 1)
|
||||
#pragma omp parallel
|
||||
@ -329,8 +324,8 @@ void AnswerT::get_answers(double **f, double **tor) {
|
||||
fp[i].z+=forcep[i].z;
|
||||
}
|
||||
if (_rot) {
|
||||
vec3d *torp=reinterpret_cast<vec3d*>(&(tor[0][0]));
|
||||
vec4d_t *torquep=reinterpret_cast<vec4d_t*>(&(force[_inum*4]));
|
||||
auto torp=reinterpret_cast<vec3d*>(&(tor[0][0]));
|
||||
auto torquep=reinterpret_cast<vec4d_t*>(&(force[_inum*4]));
|
||||
for (int i=ifrom; i<ito; i++) {
|
||||
torp[i].x+=torquep[i].x;
|
||||
torp[i].y+=torquep[i].y;
|
||||
|
||||
@ -107,17 +107,17 @@ bool AtomT::alloc(const int nall) {
|
||||
gpu_bytes+=x_cast.device.row_bytes()+type_cast.device.row_bytes();
|
||||
#endif
|
||||
|
||||
if (_charge && _host_view==false) {
|
||||
if (_charge && !_host_view) {
|
||||
success=success && (q.alloc(_max_atoms,*dev,UCL_WRITE_ONLY,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=q.device.row_bytes();
|
||||
}
|
||||
if (_rot && _host_view==false) {
|
||||
if (_rot && !_host_view) {
|
||||
success=success && (quat.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=quat.device.row_bytes();
|
||||
}
|
||||
if (_vel && _host_view==false) {
|
||||
if (_vel && !_host_view) {
|
||||
success=success && (v.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=v.device.row_bytes();
|
||||
@ -161,37 +161,37 @@ bool AtomT::add_fields(const bool charge, const bool rot,
|
||||
// Ignore host/device transfers?
|
||||
int gpu_bytes=0;
|
||||
|
||||
if (charge && _charge==false) {
|
||||
if (charge && !_charge) {
|
||||
_charge=true;
|
||||
_other=true;
|
||||
if (_host_view==false) {
|
||||
if (!_host_view) {
|
||||
success=success && (q.alloc(_max_atoms,*dev,UCL_WRITE_ONLY,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=q.device.row_bytes();
|
||||
}
|
||||
}
|
||||
|
||||
if (rot && _rot==false) {
|
||||
if (rot && !_rot) {
|
||||
_rot=true;
|
||||
_other=true;
|
||||
if (_host_view==false) {
|
||||
if (!_host_view) {
|
||||
success=success && (quat.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=quat.device.row_bytes();
|
||||
}
|
||||
}
|
||||
|
||||
if (vel && _vel==false) {
|
||||
if (vel && !_vel) {
|
||||
_vel=true;
|
||||
_other=true;
|
||||
if (_host_view==false) {
|
||||
if (!_host_view) {
|
||||
success=success && (v.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=v.device.row_bytes();
|
||||
}
|
||||
}
|
||||
|
||||
if (bonds && _bonds==false) {
|
||||
if (bonds && !_bonds) {
|
||||
_bonds=true;
|
||||
if (_bonds && _gpu_nbor>0) {
|
||||
success=success && (dev_tag.alloc(_max_atoms,*dev,
|
||||
|
||||
@ -101,7 +101,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
||||
// Get the names of all nodes
|
||||
int name_length;
|
||||
char node_name[MPI_MAX_PROCESSOR_NAME];
|
||||
char *node_names = new char[MPI_MAX_PROCESSOR_NAME*_world_size];
|
||||
auto node_names = new char[MPI_MAX_PROCESSOR_NAME*_world_size];
|
||||
MPI_Get_processor_name(node_name,&name_length);
|
||||
MPI_Allgather(&node_name,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,&node_names[0],
|
||||
MPI_MAX_PROCESSOR_NAME,MPI_CHAR,_comm_world);
|
||||
@ -201,9 +201,9 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
||||
unsigned best_cus = gpu->cus(0);
|
||||
bool type_match = (gpu->device_type(0) == type);
|
||||
for (int i = 1; i < gpu->num_devices(); i++) {
|
||||
if (type_match==true && gpu->device_type(i)!=type)
|
||||
if (type_match && gpu->device_type(i)!=type)
|
||||
continue;
|
||||
if (type_match == false && gpu->device_type(i) == type) {
|
||||
if (type_match && gpu->device_type(i) == type) {
|
||||
type_match = true;
|
||||
best_cus = gpu->cus(i);
|
||||
best_device = i;
|
||||
@ -280,7 +280,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
||||
MPI_Comm_rank(_comm_gpu,&_gpu_rank);
|
||||
|
||||
#if !defined(CUDA_PROXY) && !defined(CUDA_MPS_SUPPORT)
|
||||
if (_procs_per_gpu>1 && gpu->sharing_supported(my_gpu)==false)
|
||||
if (_procs_per_gpu>1 && !gpu->sharing_supported(my_gpu))
|
||||
return -7;
|
||||
#endif
|
||||
|
||||
@ -400,7 +400,7 @@ int DeviceT::set_ocl_params(std::string s_config, const std::string &extra_args)
|
||||
_ocl_compile_string += " -DCONFIG_ID="+params[0]+
|
||||
" -DSIMD_SIZE="+params[1]+
|
||||
" -DMEM_THREADS="+params[2];
|
||||
if (gpu->has_shuffle_support()==false)
|
||||
if (!gpu->has_shuffle_support())
|
||||
_ocl_compile_string+=" -DSHUFFLE_AVAIL=0";
|
||||
else
|
||||
_ocl_compile_string+=" -DSHUFFLE_AVAIL="+params[3];
|
||||
@ -443,7 +443,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
const bool vel) {
|
||||
if (!_device_init)
|
||||
return -1;
|
||||
if (sizeof(acctyp)==sizeof(double) && gpu->double_precision()==false)
|
||||
if (sizeof(acctyp)==sizeof(double) && !gpu->double_precision())
|
||||
return -5;
|
||||
|
||||
// Counts of data transfers for timing overhead estimates
|
||||
@ -480,11 +480,11 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
if (vel)
|
||||
_data_in_estimate++;
|
||||
} else {
|
||||
if (atom.charge()==false && charge)
|
||||
if (!atom.charge() && charge)
|
||||
_data_in_estimate++;
|
||||
if (atom.quaternion()==false && rot)
|
||||
if (!atom.quaternion() && rot)
|
||||
_data_in_estimate++;
|
||||
if (atom.velocity()==false && vel)
|
||||
if (!atom.velocity() && vel)
|
||||
_data_in_estimate++;
|
||||
if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial,vel))
|
||||
return -3;
|
||||
@ -502,7 +502,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
|
||||
const int nall) {
|
||||
if (!_device_init)
|
||||
return -1;
|
||||
if (sizeof(acctyp)==sizeof(double) && gpu->double_precision()==false)
|
||||
if (sizeof(acctyp)==sizeof(double) && !gpu->double_precision())
|
||||
return -5;
|
||||
|
||||
if (_init_count==0) {
|
||||
|
||||
@ -113,7 +113,7 @@ bool Neighbor::init(NeighborShared *shared, const int inum,
|
||||
if (!success)
|
||||
return false;
|
||||
|
||||
if (_use_packing==false) {
|
||||
if (!_use_packing) {
|
||||
#ifndef LAL_USE_OLD_NEIGHBOR
|
||||
_shared->compile_kernels(devi, gpu_nbor, compile_flags+
|
||||
" -DMAX_SUBGROUPS_PER_BLOCK="+toa(_block_nbor_build/_simd_size));
|
||||
@ -153,7 +153,7 @@ void Neighbor::alloc(bool &success) {
|
||||
int nt=_max_atoms+_max_host;
|
||||
if (_max_nbors)
|
||||
_max_nbors = ((_max_nbors-1)/_threads_per_atom+1)*_threads_per_atom;
|
||||
if (_use_packing==false || _gpu_nbor>0) {
|
||||
if (!_use_packing || _gpu_nbor>0) {
|
||||
if (_max_nbors)
|
||||
success=success &&
|
||||
(dev_nbor.alloc((_max_nbors+2)*_max_atoms,*dev)==UCL_SUCCESS);
|
||||
@ -166,7 +166,7 @@ void Neighbor::alloc(bool &success) {
|
||||
|
||||
_c_bytes=dev_nbor.row_bytes();
|
||||
if (_alloc_packed) {
|
||||
if (_use_packing==false) {
|
||||
if (!_use_packing) {
|
||||
dev_packed_begin.clear();
|
||||
success=success && (dev_packed_begin.alloc(_max_atoms,*dev,
|
||||
_packed_permissions)==UCL_SUCCESS);
|
||||
@ -373,7 +373,7 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj,
|
||||
|
||||
time_nbor.stop();
|
||||
|
||||
if (_use_packing==false) {
|
||||
if (!_use_packing) {
|
||||
time_kernel.start();
|
||||
int GX=static_cast<int>(ceil(static_cast<double>(inum)*_threads_per_atom/
|
||||
block_size));
|
||||
@ -450,7 +450,7 @@ void Neighbor::get_host3(const int inum, const int nlist, int *ilist, int *numj,
|
||||
}
|
||||
time_nbor.stop();
|
||||
|
||||
if (_use_packing==false) {
|
||||
if (!_use_packing) {
|
||||
time_kernel.start();
|
||||
int GX=static_cast<int>(ceil(static_cast<double>(inum)*_threads_per_atom/
|
||||
block_size));
|
||||
@ -564,7 +564,7 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
||||
#endif
|
||||
}
|
||||
|
||||
const numtyp cutoff_cast=static_cast<numtyp>(_cutoff);
|
||||
const auto cutoff_cast=static_cast<numtyp>(_cutoff);
|
||||
|
||||
if (_maxspecial>0) {
|
||||
time_nbor.start();
|
||||
@ -741,12 +741,12 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
||||
// If binning on GPU, do this now
|
||||
if (_gpu_nbor==1) {
|
||||
mn = _max_nbors;
|
||||
const numtyp i_cell_size=static_cast<numtyp>(1.0/_cell_size);
|
||||
const auto i_cell_size=static_cast<numtyp>(1.0/_cell_size);
|
||||
const int neigh_block=_block_cell_id;
|
||||
const int GX=(int)ceil((double)nall/neigh_block);
|
||||
const numtyp sublo0=static_cast<numtyp>(sublo[0]);
|
||||
const numtyp sublo1=static_cast<numtyp>(sublo[1]);
|
||||
const numtyp sublo2=static_cast<numtyp>(sublo[2]);
|
||||
const auto sublo0=static_cast<numtyp>(sublo[0]);
|
||||
const auto sublo1=static_cast<numtyp>(sublo[1]);
|
||||
const auto sublo2=static_cast<numtyp>(sublo[2]);
|
||||
_shared->k_cell_id.set_size(GX,neigh_block);
|
||||
_shared->k_cell_id.run(&atom.x, &atom.dev_cell_id,
|
||||
&atom.dev_particle_id, &sublo0, &sublo1,
|
||||
|
||||
@ -303,7 +303,7 @@ int PPPMT::spread(const int ago, const int nlocal, const int nall,
|
||||
double *host_q, double *boxlo,
|
||||
const double delxinv, const double delyinv,
|
||||
const double delzinv) {
|
||||
if (_precompute_done==false) {
|
||||
if (!_precompute_done) {
|
||||
atom->acc_timers();
|
||||
_precompute(ago,nlocal,nall,host_x,host_type,success,host_q,boxlo,delxinv,
|
||||
delyinv,delzinv);
|
||||
@ -359,7 +359,7 @@ void PPPMT::interp(const grdtyp qqrd2e_scale) {
|
||||
time_interp.stop();
|
||||
|
||||
ans->copy_answers(false,false,false,false,0);
|
||||
if (_kspace_split==false)
|
||||
if (!_kspace_split)
|
||||
device->add_ans_object(ans);
|
||||
}
|
||||
|
||||
|
||||
@ -101,7 +101,7 @@ float * pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
|
||||
float *b=pppm_gpu_init(PPPMF,nlocal,nall,screen,order,nxlo_out,nylo_out,
|
||||
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
|
||||
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,split,success);
|
||||
if (split==false && respa==false)
|
||||
if (!split && !respa)
|
||||
PPPMF.device->set_single_precompute(&PPPMF);
|
||||
return b;
|
||||
}
|
||||
@ -146,7 +146,7 @@ double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
|
||||
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
|
||||
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
|
||||
split,success);
|
||||
if (split==false && respa==false)
|
||||
if (!split && !respa)
|
||||
PPPMD.device->set_double_precompute(&PPPMD);
|
||||
return b;
|
||||
}
|
||||
|
||||
@ -58,49 +58,6 @@
|
||||
#define MAX_BIO_SHARED_TYPES 128
|
||||
#define PPPM_MAX_SPLINE 8
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// LEGACY DEVICE CONFIGURATION
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
|
||||
#if (__CUDA_ARCH__ < 200)
|
||||
|
||||
#undef CONFIG_ID
|
||||
#define CONFIG_ID 101
|
||||
#define MEM_THREADS 16
|
||||
#undef THREADS_PER_ATOM
|
||||
#define THREADS_PER_ATOM 1
|
||||
#undef THREADS_PER_CHARGE
|
||||
#define THREADS_PER_CHARGE 16
|
||||
#undef BLOCK_PAIR
|
||||
#define BLOCK_PAIR 64
|
||||
#undef BLOCK_BIO_PAIR
|
||||
#define BLOCK_BIO_PAIR 64
|
||||
#undef BLOCK_NBOR_BUILD
|
||||
#define BLOCK_NBOR_BUILD 64
|
||||
#undef MAX_SHARED_TYPES
|
||||
#define MAX_SHARED_TYPES 8
|
||||
#undef SHUFFLE_AVAIL
|
||||
#define SHUFFLE_AVAIL 0
|
||||
|
||||
#elseif (__CUDA_ARCH__ < 300)
|
||||
|
||||
#undef CONFIG_ID
|
||||
#define CONFIG_ID 102
|
||||
#undef BLOCK_PAIR
|
||||
#define BLOCK_PAIR 128
|
||||
#undef BLOCK_BIO_PAIR
|
||||
#define BLOCK_BIO_PAIR 128
|
||||
#undef MAX_SHARED_TYPES
|
||||
#define MAX_SHARED_TYPES 8
|
||||
#undef SHUFFLE_AVAIL
|
||||
#define SHUFFLE_AVAIL 0
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// KERNEL MACROS
|
||||
// -------------------------------------------------------------------------
|
||||
@ -111,12 +68,6 @@
|
||||
|
||||
#define fast_mul(X,Y) (X)*(Y)
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
#if (__CUDA_ARCH__ < 200)
|
||||
#define fast_mul __mul24
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define EVFLAG 1
|
||||
#define NOUNROLL
|
||||
#define GLOBAL_ID_X threadIdx.x+fast_mul(blockIdx.x,blockDim.x)
|
||||
@ -220,14 +171,6 @@
|
||||
// KERNEL MACROS - MATH
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
#ifdef CUDA_PRE_THREE
|
||||
struct __builtin_align__(16) _double4
|
||||
{
|
||||
double x, y, z, w;
|
||||
};
|
||||
typedef struct _double4 double4;
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
|
||||
#define ucl_exp exp
|
||||
|
||||
@ -69,7 +69,7 @@ int YukawaColloidT::init(const int ntypes,
|
||||
|
||||
_max_rad_size=static_cast<int>(static_cast<double>(ef_nall)*1.10);
|
||||
|
||||
if (_shared_view==false)
|
||||
if (!_shared_view)
|
||||
c_rad.alloc(_max_rad_size,*(this->ucl_device),UCL_WRITE_ONLY,UCL_READ_ONLY);
|
||||
|
||||
rad_tex.get_texture(*(this->pair_program),"rad_tex");
|
||||
@ -157,7 +157,7 @@ void YukawaColloidT::compute(const int f_ago, const int inum_full,
|
||||
|
||||
if (nall>_max_rad_size) {
|
||||
_max_rad_size=static_cast<int>(static_cast<double>(nall)*1.10);
|
||||
if (_shared_view==false) {
|
||||
if (!_shared_view) {
|
||||
c_rad.resize(_max_rad_size);
|
||||
rad_tex.bind_float(c_rad,1);
|
||||
}
|
||||
@ -229,7 +229,7 @@ int** YukawaColloidT::compute(const int ago, const int inum_full,
|
||||
|
||||
if (nall>_max_rad_size) {
|
||||
_max_rad_size=static_cast<int>(static_cast<double>(nall)*1.10);
|
||||
if (_shared_view==false) {
|
||||
if (!_shared_view) {
|
||||
c_rad.resize(_max_rad_size);
|
||||
rad_tex.bind_float(c_rad,1);
|
||||
}
|
||||
|
||||
@ -34,12 +34,12 @@ make lib-meam args="-m ifort" # build MEAM lib with custom Makefile.ifort (usi
|
||||
|
||||
# settings
|
||||
|
||||
version = "1.3.0"
|
||||
version = "1.3.2"
|
||||
url = "https://github.com/MolSSI-MDI/MDI_Library/archive/v%s.tar.gz" % version
|
||||
|
||||
# known checksums for different MDI versions. used to validate the download.
|
||||
checksums = { \
|
||||
'1.3.0' : '8a8da217148bd9b700083b67d795af5e', \
|
||||
'1.3.2' : '836f5da400d8cff0f0e4435640f9454f', \
|
||||
}
|
||||
|
||||
# print error message or help
|
||||
|
||||
Reference in New Issue
Block a user