Ran the four make commands in the src folder: make fix-whitespace; make fix-homepage; make fix-errordocs; make fix-permissions
This commit is contained in:
@ -285,7 +285,7 @@ class Atom {
|
|||||||
/// Signal that we need to transfer atom data for next timestep
|
/// Signal that we need to transfer atom data for next timestep
|
||||||
inline void data_unavail()
|
inline void data_unavail()
|
||||||
{ _x_avail=false; _q_avail=false; _quat_avail=false; _v_avail=false; _extra_avail=false; _resized=false; }
|
{ _x_avail=false; _q_avail=false; _quat_avail=false; _v_avail=false; _extra_avail=false; _resized=false; }
|
||||||
|
|
||||||
/// Signal that we need to transfer atom extra data for next kernel call
|
/// Signal that we need to transfer atom extra data for next kernel call
|
||||||
inline void extra_data_unavail()
|
inline void extra_data_unavail()
|
||||||
{ _extra_avail=false; }
|
{ _extra_avail=false; }
|
||||||
|
|||||||
@ -42,7 +42,7 @@ BaseAmoebaT::~BaseAmoeba() {
|
|||||||
k_polar.clear();
|
k_polar.clear();
|
||||||
k_special15.clear();
|
k_special15.clear();
|
||||||
k_short_nbor.clear();
|
k_short_nbor.clear();
|
||||||
|
|
||||||
#if 0 // !defined(USE_OPENCL) && !defined(USE_HIP)
|
#if 0 // !defined(USE_OPENCL) && !defined(USE_HIP)
|
||||||
if (fft_plan_created) cufftDestroy(plan);
|
if (fft_plan_created) cufftDestroy(plan);
|
||||||
#endif
|
#endif
|
||||||
@ -365,7 +365,7 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Compute multipole real-space part
|
// Compute multipole real-space part
|
||||||
// precompute() should be already invoked before mem (re)allocation
|
// precompute() should be already invoked before mem (re)allocation
|
||||||
// this is the first part in a time step done on the GPU for AMOEBA for now
|
// this is the first part in a time step done on the GPU for AMOEBA for now
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
|
void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
|
||||||
@ -418,7 +418,7 @@ void BaseAmoebaT::compute_udirect2b(int *host_amtype, int *host_amgroup, double
|
|||||||
|
|
||||||
cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval);
|
cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval);
|
||||||
atom->add_extra_data();
|
atom->add_extra_data();
|
||||||
|
|
||||||
*fieldp_ptr=_fieldp.host.begin();
|
*fieldp_ptr=_fieldp.host.begin();
|
||||||
|
|
||||||
// specify the correct cutoff and alpha values
|
// specify the correct cutoff and alpha values
|
||||||
@ -443,7 +443,7 @@ void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double
|
|||||||
// only copy the necessary data arrays that are updated over the iterations
|
// only copy the necessary data arrays that are updated over the iterations
|
||||||
// use nullptr for the other arrays that are already copied from host to device
|
// use nullptr for the other arrays that are already copied from host to device
|
||||||
cast_extra_data(host_amtype, host_amgroup, nullptr, host_uind, host_uinp, nullptr);
|
cast_extra_data(host_amtype, host_amgroup, nullptr, host_uind, host_uinp, nullptr);
|
||||||
atom->add_extra_data();
|
atom->add_extra_data();
|
||||||
|
|
||||||
// set the correct cutoff and alpha
|
// set the correct cutoff and alpha
|
||||||
_off2_polar = off2_polar;
|
_off2_polar = off2_polar;
|
||||||
@ -648,7 +648,7 @@ int BaseAmoebaT::fphi_uind() {
|
|||||||
int ngridxy = _ngridx * _ngridy;
|
int ngridxy = _ngridx * _ngridy;
|
||||||
k_fphi_uind.set_size(GX,BX);
|
k_fphi_uind.set_size(GX,BX);
|
||||||
k_fphi_uind.run(&_thetai1, &_thetai2, &_thetai3, &_igrid, &_cgrid_brick,
|
k_fphi_uind.run(&_thetai1, &_thetai2, &_thetai3, &_igrid, &_cgrid_brick,
|
||||||
&_fdip_phi1, &_fdip_phi2, &_fdip_sum_phi, &_bsorder, &ainum,
|
&_fdip_phi1, &_fdip_phi2, &_fdip_sum_phi, &_bsorder, &ainum,
|
||||||
&_nzlo_out, &_nylo_out, &_nxlo_out, &ngridxy, &_ngridx);
|
&_nzlo_out, &_nylo_out, &_nxlo_out, &ngridxy, &_ngridx);
|
||||||
time_pair.stop();
|
time_pair.stop();
|
||||||
|
|
||||||
@ -738,7 +738,7 @@ void BaseAmoebaT::compute_polar_real(int *host_amtype, int *host_amgroup,
|
|||||||
// cast necessary data arrays from host to device
|
// cast necessary data arrays from host to device
|
||||||
|
|
||||||
cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval);
|
cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval);
|
||||||
atom->add_extra_data();
|
atom->add_extra_data();
|
||||||
|
|
||||||
*tep_ptr=_tep.host.begin();
|
*tep_ptr=_tep.host.begin();
|
||||||
|
|
||||||
@ -784,7 +784,7 @@ template <class numtyp, class acctyp>
|
|||||||
void BaseAmoebaT::compute_fft1d(void* in, void* out, const int numel, const int mode)
|
void BaseAmoebaT::compute_fft1d(void* in, void* out, const int numel, const int mode)
|
||||||
{
|
{
|
||||||
// TODO: setting up FFT plan based on the backend (cuFFT or hipFFT)
|
// TODO: setting up FFT plan based on the backend (cuFFT or hipFFT)
|
||||||
#if 0 // !defined(USE_OPENCL) && !defined(USE_HIP)
|
#if 0 // !defined(USE_OPENCL) && !defined(USE_HIP)
|
||||||
if (fft_plan_created == false) {
|
if (fft_plan_created == false) {
|
||||||
int m = numel/2;
|
int m = numel/2;
|
||||||
cufftPlan1d(&plan, m, CUFFT_Z2Z, 1);
|
cufftPlan1d(&plan, m, CUFFT_Z2Z, 1);
|
||||||
@ -793,7 +793,7 @@ void BaseAmoebaT::compute_fft1d(void* in, void* out, const int numel, const int
|
|||||||
|
|
||||||
// n = number of double complex
|
// n = number of double complex
|
||||||
int n = numel/2;
|
int n = numel/2;
|
||||||
|
|
||||||
// copy the host array to the device (data)
|
// copy the host array to the device (data)
|
||||||
UCL_Vector<cufftDoubleComplex,cufftDoubleComplex> data;
|
UCL_Vector<cufftDoubleComplex,cufftDoubleComplex> data;
|
||||||
data.alloc(n, *(this->ucl_device), UCL_READ_WRITE, UCL_READ_WRITE);
|
data.alloc(n, *(this->ucl_device), UCL_READ_WRITE, UCL_READ_WRITE);
|
||||||
@ -807,7 +807,7 @@ void BaseAmoebaT::compute_fft1d(void* in, void* out, const int numel, const int
|
|||||||
data.update_device(false);
|
data.update_device(false);
|
||||||
|
|
||||||
// perform the in-place forward FFT
|
// perform the in-place forward FFT
|
||||||
|
|
||||||
cufftResult result = cufftExecZ2Z(plan, (cufftDoubleComplex*)&data.device,
|
cufftResult result = cufftExecZ2Z(plan, (cufftDoubleComplex*)&data.device,
|
||||||
(cufftDoubleComplex*)&data.device, CUFFT_FORWARD);
|
(cufftDoubleComplex*)&data.device, CUFFT_FORWARD);
|
||||||
if (result != CUFFT_SUCCESS) printf("failed cufft %d\n", result);
|
if (result != CUFFT_SUCCESS) printf("failed cufft %d\n", result);
|
||||||
|
|||||||
@ -256,7 +256,7 @@ class BaseAmoeba {
|
|||||||
int _ngridx, _ngridy, _ngridz, _num_grid_points;
|
int _ngridx, _ngridy, _ngridz, _num_grid_points;
|
||||||
|
|
||||||
int _end_command_queue;
|
int _end_command_queue;
|
||||||
|
|
||||||
// ------------------------ FORCE/ENERGY DATA -----------------------
|
// ------------------------ FORCE/ENERGY DATA -----------------------
|
||||||
|
|
||||||
Answer<numtyp,acctyp> *ans;
|
Answer<numtyp,acctyp> *ans;
|
||||||
@ -312,7 +312,7 @@ class BaseAmoeba {
|
|||||||
virtual int fphi_uind();
|
virtual int fphi_uind();
|
||||||
virtual int fphi_mpole();
|
virtual int fphi_mpole();
|
||||||
virtual int polar_real(const int eflag, const int vflag) = 0;
|
virtual int polar_real(const int eflag, const int vflag) = 0;
|
||||||
|
|
||||||
|
|
||||||
#if !defined(USE_OPENCL) && !defined(USE_HIP)
|
#if !defined(USE_OPENCL) && !defined(USE_HIP)
|
||||||
cufftHandle plan;
|
cufftHandle plan;
|
||||||
|
|||||||
@ -597,11 +597,11 @@ int HippoT::polar_real(const int eflag, const int vflag) {
|
|||||||
int nbor_pitch=this->nbor->nbor_pitch();
|
int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
|
|
||||||
const int BX=this->block_size();
|
const int BX=this->block_size();
|
||||||
const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
||||||
/*
|
/*
|
||||||
const int cus = this->device->gpu->cus();
|
const int cus = this->device->gpu->cus();
|
||||||
while (GX < cus && GX > 1) {
|
while (GX < cus && GX > 1) {
|
||||||
BX /= 2;
|
BX /= 2;
|
||||||
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
||||||
|
|||||||
@ -124,7 +124,7 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
UCL_D_Vec<numtyp4> coeff_amtype;
|
UCL_D_Vec<numtyp4> coeff_amtype;
|
||||||
/// csix = coeff_amclass.x; adisp = coeff_amclass.y;
|
/// csix = coeff_amclass.x; adisp = coeff_amclass.y;
|
||||||
UCL_D_Vec<numtyp4> coeff_amclass;
|
UCL_D_Vec<numtyp4> coeff_amclass;
|
||||||
/// sizpr = coeff_rep.x; dmppr = coeff_rep.y; elepr = coeff_rep.z;
|
/// sizpr = coeff_rep.x; dmppr = coeff_rep.y; elepr = coeff_rep.z;
|
||||||
UCL_D_Vec<numtyp4> coeff_rep;
|
UCL_D_Vec<numtyp4> coeff_rep;
|
||||||
/// Special polar values [0-4]:
|
/// Special polar values [0-4]:
|
||||||
/// sp_polar.x = special_polar_wscale
|
/// sp_polar.x = special_polar_wscale
|
||||||
|
|||||||
@ -173,7 +173,7 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
|||||||
dmpik[4] = pre * (s*d2s + ds*ds);
|
dmpik[4] = pre * (s*d2s + ds*ds);
|
||||||
dmpik[6] = pre * (s*d3s + (numtyp)3.0*ds*d2s);
|
dmpik[6] = pre * (s*d3s + (numtyp)3.0*ds*d2s);
|
||||||
dmpik[8] = pre * (s*d4s + (numtyp)4.0*ds*d3s + (numtyp)3.0*d2s*d2s);
|
dmpik[8] = pre * (s*d4s + (numtyp)4.0*ds*d3s + (numtyp)3.0*d2s*d2s);
|
||||||
|
|
||||||
if (rorder >= 11) dmpik[10] = pre * (s*d5s + (numtyp)5.0*ds*d4s + (numtyp)10.0*d2s*d3s);
|
if (rorder >= 11) dmpik[10] = pre * (s*d5s + (numtyp)5.0*ds*d4s + (numtyp)10.0*d2s*d3s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -684,7 +684,7 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
|||||||
if (_cutoff < _cell_size) vadjust*=1.46;
|
if (_cutoff < _cell_size) vadjust*=1.46;
|
||||||
mn=std::max(mn,static_cast<int>(ceil(_max_neighbor_factor*vadjust*mn)));
|
mn=std::max(mn,static_cast<int>(ceil(_max_neighbor_factor*vadjust*mn)));
|
||||||
if (mn<33) mn+=3;
|
if (mn<33) mn+=3;
|
||||||
|
|
||||||
resize_max_neighbors<numtyp,acctyp>(mn,success);
|
resize_max_neighbors<numtyp,acctyp>(mn,success);
|
||||||
set_nbor_block_size(mn/2);
|
set_nbor_block_size(mn/2);
|
||||||
if (!success)
|
if (!success)
|
||||||
@ -837,7 +837,7 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
|||||||
time_nbor.stop();
|
time_nbor.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Neighbor::transpose(UCL_D_Vec<tagint> &out, const UCL_D_Vec<tagint> &in,
|
void Neighbor::transpose(UCL_D_Vec<tagint> &out, const UCL_D_Vec<tagint> &in,
|
||||||
const int columns_in, const int rows_in)
|
const int columns_in, const int rows_in)
|
||||||
{
|
{
|
||||||
const int b2x=_block_cell_2d;
|
const int b2x=_block_cell_2d;
|
||||||
|
|||||||
@ -260,7 +260,7 @@ class Neighbor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Helper function
|
/// Helper function
|
||||||
void transpose(UCL_D_Vec<tagint> &out, const UCL_D_Vec<tagint> &in,
|
void transpose(UCL_D_Vec<tagint> &out, const UCL_D_Vec<tagint> &in,
|
||||||
const int columns_in, const int rows_in);
|
const int columns_in, const int rows_in);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@ -73,7 +73,7 @@ void PairAmoeba::moduli()
|
|||||||
_nfft_max = maxfft;
|
_nfft_max = maxfft;
|
||||||
memory->create(_moduli_bsarray,_nfft_max,"amoeba:_moduli_bsarray");
|
memory->create(_moduli_bsarray,_nfft_max,"amoeba:_moduli_bsarray");
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute and load the moduli values
|
// compute and load the moduli values
|
||||||
|
|
||||||
double x = 0.0;
|
double x = 0.0;
|
||||||
|
|||||||
@ -419,7 +419,7 @@ void PairAmoeba::multipole_real()
|
|||||||
term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
|
term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
|
||||||
term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
|
term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// find damped multipole intermediates for force and torque
|
// find damped multipole intermediates for force and torque
|
||||||
|
|
||||||
@ -465,7 +465,7 @@ void PairAmoeba::multipole_real()
|
|||||||
term4 = 2.0 * (-ck*rr5+dkr*rr7-qkr*rr9);
|
term4 = 2.0 * (-ck*rr5+dkr*rr7-qkr*rr9);
|
||||||
term5 = 2.0 * (-ci*rr5-dir*rr7-qir*rr9);
|
term5 = 2.0 * (-ci*rr5-dir*rr7-qir*rr9);
|
||||||
term6 = 4.0 * rr7;
|
term6 = 4.0 * rr7;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
empole += e;
|
empole += e;
|
||||||
|
|||||||
@ -570,7 +570,7 @@ void PairAmoeba::finish()
|
|||||||
double time_mutual_fft = ic_kspace->time_fft;
|
double time_mutual_fft = ic_kspace->time_fft;
|
||||||
MPI_Allreduce(&time_mutual_fft,&ave,1,MPI_DOUBLE,MPI_SUM,world);
|
MPI_Allreduce(&time_mutual_fft,&ave,1,MPI_DOUBLE,MPI_SUM,world);
|
||||||
time_mutual_fft = ave/comm->nprocs;
|
time_mutual_fft = ave/comm->nprocs;
|
||||||
|
|
||||||
double time_total = (time_init + time_hal + time_repulse + time_disp +
|
double time_total = (time_init + time_hal + time_repulse + time_disp +
|
||||||
time_mpole + time_induce + time_polar + time_qxfer) / 100.0;
|
time_mpole + time_induce + time_polar + time_qxfer) / 100.0;
|
||||||
|
|
||||||
@ -597,7 +597,7 @@ void PairAmoeba::finish()
|
|||||||
utils::logmesg(lmp," Mpole time: {:.6g} {:.3g}%\n", time_mpole_rspace, time_mpole_rspace/time_total);
|
utils::logmesg(lmp," Mpole time: {:.6g} {:.3g}%\n", time_mpole_rspace, time_mpole_rspace/time_total);
|
||||||
utils::logmesg(lmp," Direct time: {:.6g} {:.3g}%\n", time_direct_rspace, time_direct_rspace/time_total);
|
utils::logmesg(lmp," Direct time: {:.6g} {:.3g}%\n", time_direct_rspace, time_direct_rspace/time_total);
|
||||||
utils::logmesg(lmp," Mutual time: {:.6g} {:.3g}%\n", time_mutual_rspace, time_mutual_rspace/time_total);
|
utils::logmesg(lmp," Mutual time: {:.6g} {:.3g}%\n", time_mutual_rspace, time_mutual_rspace/time_total);
|
||||||
utils::logmesg(lmp," Polar time: {:.6g} {:.3g}%\n", time_polar_rspace, time_polar_rspace/time_total);
|
utils::logmesg(lmp," Polar time: {:.6g} {:.3g}%\n", time_polar_rspace, time_polar_rspace/time_total);
|
||||||
utils::logmesg(lmp," K-space timing breakdown : {:.3g}%\n", kspace_time/time_total);
|
utils::logmesg(lmp," K-space timing breakdown : {:.3g}%\n", kspace_time/time_total);
|
||||||
utils::logmesg(lmp," Mpole time: {:.6g} {:.3g}%\n", time_mpole_kspace, time_mpole_kspace/time_total);
|
utils::logmesg(lmp," Mpole time: {:.6g} {:.3g}%\n", time_mpole_kspace, time_mpole_kspace/time_total);
|
||||||
utils::logmesg(lmp," Direct time: {:.6g} {:.3g}%\n", time_direct_kspace, time_direct_kspace/time_total);
|
utils::logmesg(lmp," Direct time: {:.6g} {:.3g}%\n", time_direct_kspace, time_direct_kspace/time_total);
|
||||||
@ -606,7 +606,7 @@ void PairAmoeba::finish()
|
|||||||
utils::logmesg(lmp," - FFT : {:.6g} {:.3g}%\n", time_mutual_fft, time_mutual_fft/time_total);
|
utils::logmesg(lmp," - FFT : {:.6g} {:.3g}%\n", time_mutual_fft, time_mutual_fft/time_total);
|
||||||
utils::logmesg(lmp," - Interp : {:.6g} {:.3g}%\n", time_fphi_uind, time_fphi_uind/time_total);
|
utils::logmesg(lmp," - Interp : {:.6g} {:.3g}%\n", time_fphi_uind, time_fphi_uind/time_total);
|
||||||
utils::logmesg(lmp," Polar time: {:.6g} {:.3g}%\n", time_polar_kspace, time_polar_kspace/time_total);
|
utils::logmesg(lmp," Polar time: {:.6g} {:.3g}%\n", time_polar_kspace, time_polar_kspace/time_total);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -347,8 +347,8 @@ class PairAmoeba : public Pair {
|
|||||||
|
|
||||||
class AmoebaConvolution *m_kspace; // multipole KSpace
|
class AmoebaConvolution *m_kspace; // multipole KSpace
|
||||||
class AmoebaConvolution *p_kspace; // polar KSpace
|
class AmoebaConvolution *p_kspace; // polar KSpace
|
||||||
class AmoebaConvolution *pc_kspace;
|
class AmoebaConvolution *pc_kspace;
|
||||||
class AmoebaConvolution *d_kspace; // dispersion KSpace
|
class AmoebaConvolution *d_kspace; // dispersion KSpace
|
||||||
class AmoebaConvolution *i_kspace; // induce KSpace
|
class AmoebaConvolution *i_kspace; // induce KSpace
|
||||||
class AmoebaConvolution *ic_kspace;
|
class AmoebaConvolution *ic_kspace;
|
||||||
|
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
https://www.lammps.org/ Sandia National Laboratories
|
https://www.lammps.org/ Sandia National Laboratories
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
LAMMPS Development team: developers@lammps.org
|
||||||
|
|
||||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
/* -*- c++ -*- ----------------------------------------------------------
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
https://www.lammps.org/ Sandia National Laboratories
|
https://www.lammps.org/ Sandia National Laboratories
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
LAMMPS Development team: developers@lammps.org
|
||||||
|
|
||||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
https://www.lammps.org/, Sandia National Laboratories
|
https://www.lammps.org/, Sandia National Laboratories
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
LAMMPS Development team: developers@lammps.org
|
||||||
|
|
||||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
@ -283,7 +283,7 @@ void PairAmoebaGPU::multipole_real()
|
|||||||
success, aewald, felec, off2, atom->q,
|
success, aewald, felec, off2, atom->q,
|
||||||
domain->boxlo, domain->prd, &tq_pinned);
|
domain->boxlo, domain->prd, &tq_pinned);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// reference to the tep array from GPU lib
|
// reference to the tep array from GPU lib
|
||||||
|
|
||||||
@ -400,7 +400,7 @@ void PairAmoebaGPU::induce()
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < nlocal; i++) {
|
for (i = 0; i < nlocal; i++) {
|
||||||
itype = amtype[i];
|
itype = amtype[i];
|
||||||
for (j = 0; j < 3; j++) {
|
for (j = 0; j < 3; j++) {
|
||||||
uopt[i][m][j] = polarity[itype] * field[i][j];
|
uopt[i][m][j] = polarity[itype] * field[i][j];
|
||||||
uoptp[i][m][j] = polarity[itype] * fieldp[i][j];
|
uoptp[i][m][j] = polarity[itype] * fieldp[i][j];
|
||||||
@ -666,7 +666,7 @@ void PairAmoebaGPU::induce()
|
|||||||
|
|
||||||
if (iter >= maxiter || eps > epsold)
|
if (iter >= maxiter || eps > epsold)
|
||||||
if (comm->me == 0)
|
if (comm->me == 0)
|
||||||
error->warning(FLERR,"AMOEBA induced dipoles did not converge");
|
error->warning(FLERR,"AMOEBA induced dipoles did not converge");
|
||||||
}
|
}
|
||||||
|
|
||||||
// update the lists of previous induced dipole values
|
// update the lists of previous induced dipole values
|
||||||
@ -958,7 +958,7 @@ void PairAmoebaGPU::ufield0c(double **field, double **fieldp)
|
|||||||
// field and fieldp may already have some nonzero values from kspace (umutual1 and self)
|
// field and fieldp may already have some nonzero values from kspace (umutual1 and self)
|
||||||
|
|
||||||
amoeba_gpu_update_fieldp(&fieldp_pinned);
|
amoeba_gpu_update_fieldp(&fieldp_pinned);
|
||||||
|
|
||||||
int inum = atom->nlocal;
|
int inum = atom->nlocal;
|
||||||
double *field_ptr = (double *)fieldp_pinned;
|
double *field_ptr = (double *)fieldp_pinned;
|
||||||
|
|
||||||
@ -1015,8 +1015,8 @@ void PairAmoebaGPU::umutual1(double **field, double **fieldp)
|
|||||||
fuind[i][1] = a[1][0]*uind[i][0] + a[1][1]*uind[i][1] + a[1][2]*uind[i][2];
|
fuind[i][1] = a[1][0]*uind[i][0] + a[1][1]*uind[i][1] + a[1][2]*uind[i][2];
|
||||||
fuind[i][2] = a[2][0]*uind[i][0] + a[2][1]*uind[i][1] + a[2][2]*uind[i][2];
|
fuind[i][2] = a[2][0]*uind[i][0] + a[2][1]*uind[i][1] + a[2][2]*uind[i][2];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
fuinp[i][0] = a[0][0]*uinp[i][0] + a[0][1]*uinp[i][1] + a[0][2]*uinp[i][2];
|
fuinp[i][0] = a[0][0]*uinp[i][0] + a[0][1]*uinp[i][1] + a[0][2]*uinp[i][2];
|
||||||
fuinp[i][1] = a[1][0]*uinp[i][0] + a[1][1]*uinp[i][1] + a[1][2]*uinp[i][2];
|
fuinp[i][1] = a[1][0]*uinp[i][0] + a[1][1]*uinp[i][1] + a[1][2]*uinp[i][2];
|
||||||
fuinp[i][2] = a[2][0]*uinp[i][0] + a[2][1]*uinp[i][1] + a[2][2]*uinp[i][2];
|
fuinp[i][2] = a[2][0]*uinp[i][0] + a[2][1]*uinp[i][1] + a[2][2]*uinp[i][2];
|
||||||
@ -1037,7 +1037,7 @@ void PairAmoebaGPU::umutual1(double **field, double **fieldp)
|
|||||||
|
|
||||||
time1 = MPI_Wtime();
|
time1 = MPI_Wtime();
|
||||||
time_grid_uind += (time1 - time0);
|
time_grid_uind += (time1 - time0);
|
||||||
|
|
||||||
// pre-convolution operations including forward FFT
|
// pre-convolution operations including forward FFT
|
||||||
// gridfft = my portion of complex 3d grid in FFT decomposition
|
// gridfft = my portion of complex 3d grid in FFT decomposition
|
||||||
|
|
||||||
@ -1137,7 +1137,7 @@ void PairAmoebaGPU::fphi_uind(double ****grid, double **fdip_phi1,
|
|||||||
void* fdip_sum_phi_pinned = nullptr;
|
void* fdip_sum_phi_pinned = nullptr;
|
||||||
amoeba_gpu_fphi_uind(grid, &fdip_phi1_pinned, &fdip_phi2_pinned,
|
amoeba_gpu_fphi_uind(grid, &fdip_phi1_pinned, &fdip_phi2_pinned,
|
||||||
&fdip_sum_phi_pinned);
|
&fdip_sum_phi_pinned);
|
||||||
|
|
||||||
int nlocal = atom->nlocal;
|
int nlocal = atom->nlocal;
|
||||||
double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned;
|
double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
@ -1356,7 +1356,7 @@ void PairAmoebaGPU::polar_kspace()
|
|||||||
bspline_fill();
|
bspline_fill();
|
||||||
|
|
||||||
// allocate memory and make early host-device transfers
|
// allocate memory and make early host-device transfers
|
||||||
|
|
||||||
// NOTE: this is for p_kspace, and igrid and thetai[1-3] are filled by bpsline_fill
|
// NOTE: this is for p_kspace, and igrid and thetai[1-3] are filled by bpsline_fill
|
||||||
if (gpu_fphi_mpole_ready) {
|
if (gpu_fphi_mpole_ready) {
|
||||||
amoeba_gpu_precompute_kspace(atom->nlocal, bsorder,
|
amoeba_gpu_precompute_kspace(atom->nlocal, bsorder,
|
||||||
@ -1365,7 +1365,7 @@ void PairAmoebaGPU::polar_kspace()
|
|||||||
p_kspace->nylo_out, p_kspace->nyhi_out,
|
p_kspace->nylo_out, p_kspace->nyhi_out,
|
||||||
p_kspace->nxlo_out, p_kspace->nxhi_out);
|
p_kspace->nxlo_out, p_kspace->nxhi_out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// convert Cartesian multipoles to fractional coordinates
|
// convert Cartesian multipoles to fractional coordinates
|
||||||
|
|
||||||
@ -1435,7 +1435,7 @@ void PairAmoebaGPU::polar_kspace()
|
|||||||
double ***gridpost = (double ***) p_kspace->post_convolution();
|
double ***gridpost = (double ***) p_kspace->post_convolution();
|
||||||
|
|
||||||
// get potential
|
// get potential
|
||||||
|
|
||||||
if (!gpu_fphi_mpole_ready) {
|
if (!gpu_fphi_mpole_ready) {
|
||||||
fphi_mpole(gridpost,fphi);
|
fphi_mpole(gridpost,fphi);
|
||||||
|
|
||||||
@ -1447,7 +1447,7 @@ void PairAmoebaGPU::polar_kspace()
|
|||||||
} else {
|
} else {
|
||||||
void* fphi_pinned = nullptr;
|
void* fphi_pinned = nullptr;
|
||||||
amoeba_gpu_fphi_mpole(gridpost, &fphi_pinned, felec);
|
amoeba_gpu_fphi_mpole(gridpost, &fphi_pinned, felec);
|
||||||
|
|
||||||
double *_fphi_ptr = (double *)fphi_pinned;
|
double *_fphi_ptr = (double *)fphi_pinned;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = i;
|
int idx = i;
|
||||||
@ -1457,7 +1457,7 @@ void PairAmoebaGPU::polar_kspace()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert field from fractional to Cartesian
|
// convert field from fractional to Cartesian
|
||||||
|
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
/* -*- c++ -*- ----------------------------------------------------------
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
https://www.lammps.org/, Sandia National Laboratories
|
https://www.lammps.org/, Sandia National Laboratories
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
LAMMPS Development team: developers@lammps.org
|
||||||
|
|
||||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
@ -71,16 +71,3 @@ class PairAmoebaGPU : public PairAmoeba {
|
|||||||
} // namespace LAMMPS_NS
|
} // namespace LAMMPS_NS
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* ERROR/WARNING messages:
|
|
||||||
|
|
||||||
E: Insufficient memory on accelerator
|
|
||||||
|
|
||||||
There is insufficient memory on one of the devices specified for the gpu
|
|
||||||
package
|
|
||||||
|
|
||||||
E: Pair style amoeba/gpu requires atom attribute q
|
|
||||||
|
|
||||||
The atom style defined does not have this attribute.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
https://www.lammps.org/, Sandia National Laboratories
|
https://www.lammps.org/, Sandia National Laboratories
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
LAMMPS Development team: developers@lammps.org
|
||||||
|
|
||||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
@ -517,7 +517,7 @@ void PairHippoGPU::induce()
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < nlocal; i++) {
|
for (i = 0; i < nlocal; i++) {
|
||||||
itype = amtype[i];
|
itype = amtype[i];
|
||||||
for (j = 0; j < 3; j++) {
|
for (j = 0; j < 3; j++) {
|
||||||
uopt[i][m][j] = polarity[itype] * field[i][j];
|
uopt[i][m][j] = polarity[itype] * field[i][j];
|
||||||
uoptp[i][m][j] = polarity[itype] * fieldp[i][j];
|
uoptp[i][m][j] = polarity[itype] * fieldp[i][j];
|
||||||
@ -785,7 +785,7 @@ void PairHippoGPU::induce()
|
|||||||
|
|
||||||
if (iter >= maxiter || eps > epsold)
|
if (iter >= maxiter || eps > epsold)
|
||||||
if (comm->me == 0)
|
if (comm->me == 0)
|
||||||
error->warning(FLERR,"HIPPO induced dipoles did not converge");
|
error->warning(FLERR,"HIPPO induced dipoles did not converge");
|
||||||
}
|
}
|
||||||
|
|
||||||
// update the lists of previous induced dipole values
|
// update the lists of previous induced dipole values
|
||||||
@ -1045,7 +1045,7 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
|
|||||||
|
|
||||||
memset(&field[0][0], 0, 3*nall *sizeof(double));
|
memset(&field[0][0], 0, 3*nall *sizeof(double));
|
||||||
memset(&fieldp[0][0], 0, 3*nall *sizeof(double));
|
memset(&fieldp[0][0], 0, 3*nall *sizeof(double));
|
||||||
|
|
||||||
// get the real space portion of the mutual field first
|
// get the real space portion of the mutual field first
|
||||||
|
|
||||||
MPI_Barrier(world);
|
MPI_Barrier(world);
|
||||||
@ -1078,7 +1078,7 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
|
|||||||
// field and fieldp may already have some nonzero values from kspace (umutual1 and self)
|
// field and fieldp may already have some nonzero values from kspace (umutual1 and self)
|
||||||
|
|
||||||
hippo_gpu_update_fieldp(&fieldp_pinned);
|
hippo_gpu_update_fieldp(&fieldp_pinned);
|
||||||
|
|
||||||
int inum = atom->nlocal;
|
int inum = atom->nlocal;
|
||||||
double *field_ptr = (double *)fieldp_pinned;
|
double *field_ptr = (double *)fieldp_pinned;
|
||||||
|
|
||||||
@ -1136,8 +1136,8 @@ void PairHippoGPU::umutual1(double **field, double **fieldp)
|
|||||||
fuind[i][1] = a[1][0]*uind[i][0] + a[1][1]*uind[i][1] + a[1][2]*uind[i][2];
|
fuind[i][1] = a[1][0]*uind[i][0] + a[1][1]*uind[i][1] + a[1][2]*uind[i][2];
|
||||||
fuind[i][2] = a[2][0]*uind[i][0] + a[2][1]*uind[i][1] + a[2][2]*uind[i][2];
|
fuind[i][2] = a[2][0]*uind[i][0] + a[2][1]*uind[i][1] + a[2][2]*uind[i][2];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
fuinp[i][0] = a[0][0]*uinp[i][0] + a[0][1]*uinp[i][1] + a[0][2]*uinp[i][2];
|
fuinp[i][0] = a[0][0]*uinp[i][0] + a[0][1]*uinp[i][1] + a[0][2]*uinp[i][2];
|
||||||
fuinp[i][1] = a[1][0]*uinp[i][0] + a[1][1]*uinp[i][1] + a[1][2]*uinp[i][2];
|
fuinp[i][1] = a[1][0]*uinp[i][0] + a[1][1]*uinp[i][1] + a[1][2]*uinp[i][2];
|
||||||
fuinp[i][2] = a[2][0]*uinp[i][0] + a[2][1]*uinp[i][1] + a[2][2]*uinp[i][2];
|
fuinp[i][2] = a[2][0]*uinp[i][0] + a[2][1]*uinp[i][1] + a[2][2]*uinp[i][2];
|
||||||
@ -1266,7 +1266,7 @@ void PairHippoGPU::fphi_uind(double ****grid, double **fdip_phi1,
|
|||||||
void* fdip_sum_phi_pinned = nullptr;
|
void* fdip_sum_phi_pinned = nullptr;
|
||||||
hippo_gpu_fphi_uind(grid, &fdip_phi1_pinned, &fdip_phi2_pinned,
|
hippo_gpu_fphi_uind(grid, &fdip_phi1_pinned, &fdip_phi2_pinned,
|
||||||
&fdip_sum_phi_pinned);
|
&fdip_sum_phi_pinned);
|
||||||
|
|
||||||
int nlocal = atom->nlocal;
|
int nlocal = atom->nlocal;
|
||||||
double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned;
|
double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
/* -*- c++ -*- ----------------------------------------------------------
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
https://www.lammps.org/, Sandia National Laboratories
|
https://www.lammps.org/, Sandia National Laboratories
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
LAMMPS Development team: developers@lammps.org
|
||||||
|
|
||||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
@ -71,16 +71,3 @@ class PairHippoGPU : public PairAmoeba {
|
|||||||
} // namespace LAMMPS_NS
|
} // namespace LAMMPS_NS
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* ERROR/WARNING messages:
|
|
||||||
|
|
||||||
E: Insufficient memory on accelerator
|
|
||||||
|
|
||||||
There is insufficient memory on one of the devices specified for the gpu
|
|
||||||
package
|
|
||||||
|
|
||||||
E: Pair style hippo/gpu requires atom attribute q
|
|
||||||
|
|
||||||
The atom style defined does not have this attribute.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|||||||
Reference in New Issue
Block a user