Unified zlist and blist CPU and GPU structures; greatly simplified/fused compute_bi
This commit is contained in:
@ -40,7 +40,6 @@ struct TagPairSNAPBeta{};
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairSNAPComputeForce{};
|
||||
|
||||
|
||||
// GPU backend only
|
||||
struct TagPairSNAPComputeNeigh{};
|
||||
struct TagPairSNAPComputeCayleyKlein{};
|
||||
@ -50,7 +49,6 @@ struct TagPairSNAPComputeUiLarge{}; // less parallelism, no divergence
|
||||
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
|
||||
struct TagPairSNAPComputeZi{};
|
||||
struct TagPairSNAPComputeBi{};
|
||||
struct TagPairSNAPTransformBi{}; // re-order blist from AoSoA to AoS
|
||||
struct TagPairSNAPComputeYi{};
|
||||
struct TagPairSNAPComputeYiWithZlist{};
|
||||
template<int dir>
|
||||
@ -187,9 +185,6 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBi,const int iatom_mod, const int idxb, const int iatom_div) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYi,const int iatom_mod, const int idxz, const int iatom_div) const;
|
||||
|
||||
@ -221,7 +216,7 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
void operator() (TagPairSNAPComputeZiCPU,const int& ii) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBiCPU>::member_type& team) const;
|
||||
void operator() (TagPairSNAPComputeBiCPU, const int& ii) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYiCPU,const int& ii) const;
|
||||
|
||||
@ -264,9 +264,8 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::compute(int eflag_in,
|
||||
Kokkos::parallel_for("ComputeZiCPU",policy_zi_cpu,*this);
|
||||
|
||||
//ComputeBi
|
||||
int team_size = team_size_default;
|
||||
check_team_size_for<TagPairSNAPComputeBiCPU>(chunk_size,team_size);
|
||||
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU> policy_bi_cpu(chunk_size,team_size,vector_length);
|
||||
int idxb_max = snaKK.idxb_max;
|
||||
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPComputeBiCPU> policy_bi_cpu(0, chunk_size * idxb_max);
|
||||
Kokkos::parallel_for("ComputeBiCPU",policy_bi_cpu,*this);
|
||||
}
|
||||
|
||||
@ -381,13 +380,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::compute(int eflag_in,
|
||||
Snap3DRangePolicy<DeviceType, tile_size_compute_bi, TagPairSNAPComputeBi>
|
||||
policy_compute_bi({0,0,0},{vector_length,idxb_max,chunk_size_div},{vector_length,tile_size_compute_bi,1});
|
||||
Kokkos::parallel_for("ComputeBi",policy_compute_bi,*this);
|
||||
|
||||
//Transform data layout of blist out of AoSoA
|
||||
//We need this because `blist` gets used in ComputeForce which doesn't
|
||||
//take advantage of AoSoA, which at best would only be beneficial on the margins
|
||||
Snap3DRangePolicy<DeviceType, tile_size_transform_bi, TagPairSNAPTransformBi>
|
||||
policy_transform_bi({0,0,0},{vector_length,idxb_max,chunk_size_div},{vector_length,tile_size_transform_bi,1});
|
||||
Kokkos::parallel_for("TransformBi",policy_transform_bi,*this);
|
||||
}
|
||||
|
||||
//Note zeroing `ylist` is fused into `TransformUi`.
|
||||
@ -900,25 +892,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
snaKK.compute_bi(iatom,jjb);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const {
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
if (idxb >= snaKK.idxb_max) return;
|
||||
|
||||
const int ntriples = snaKK.ntriples;
|
||||
|
||||
for (int itriple = 0; itriple < ntriples; itriple++) {
|
||||
|
||||
const real_type blocal = snaKK.blist_gpu(iatom, idxb, itriple);
|
||||
|
||||
snaKK.blist(iatom, itriple, idxb) = blocal;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
template<int dir>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1151,9 +1124,10 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU>::member_type& team) const {
|
||||
int ii = team.league_rank();
|
||||
snaKK.compute_bi_cpu(team,ii);
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBiCPU, const int& ii) const {
|
||||
const int iatom = ii / snaKK.idxb_max;
|
||||
const int jjb = ii % snaKK.idxb_max;
|
||||
snaKK.compute_bi(iatom, jjb);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
|
||||
@ -244,8 +244,6 @@ class SNAKokkos {
|
||||
void compute_zi_cpu(const int&) const; // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi_cpu(int) const; // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int) const; // ForceSNAP
|
||||
|
||||
// functions for derivatives, CPU only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -294,12 +292,13 @@ class SNAKokkos {
|
||||
t_sna_3d ulisttot_im;
|
||||
t_sna_3c ulisttot; // un-folded ulisttot
|
||||
|
||||
t_sna_3c zlist;
|
||||
t_sna_3d blist;
|
||||
|
||||
t_sna_3d ylist_re;
|
||||
t_sna_3d ylist_im;
|
||||
|
||||
// Structures for the CPU backend only
|
||||
t_sna_3d blist;
|
||||
t_sna_3c_ll zlist;
|
||||
|
||||
t_sna_3c_ll ulist;
|
||||
|
||||
@ -313,9 +312,6 @@ class SNAKokkos {
|
||||
t_sna_3c db_gpu; // `db`
|
||||
t_sna_3d sfac_gpu; // sfac, dsfac_{x,y,z}
|
||||
|
||||
t_sna_3c zlist_gpu;
|
||||
t_sna_3d blist_gpu;
|
||||
|
||||
int idxcg_max, idxu_max, idxu_half_max, idxu_cache_max, idxz_max, idxb_max;
|
||||
|
||||
// Chem snap counts
|
||||
|
||||
@ -317,6 +317,9 @@ void SNAKokkos<DeviceType, real_type, vector_length>::grow_rij(int newnatom, int
|
||||
MemKK::realloc_kokkos(ulisttot_im,"sna:ulisttot_im", natom_pad, nelements, idxu_half_max);
|
||||
MemKK::realloc_kokkos(ulisttot,"sna:ulisttot", natom_pad, nelements, idxu_max);
|
||||
|
||||
MemKK::realloc_kokkos(zlist,"sna:zlist", natom_pad, ndoubles, idxz_max);
|
||||
MemKK::realloc_kokkos(blist,"sna:blist", natom_pad, ntriples, idxb_max);
|
||||
|
||||
MemKK::realloc_kokkos(ylist_re,"sna:ylist_re", natom_pad, nelements, idxu_half_max);
|
||||
MemKK::realloc_kokkos(ylist_im,"sna:ylist_im", natom_pad, nelements, idxu_half_max);
|
||||
|
||||
@ -327,10 +330,6 @@ void SNAKokkos<DeviceType, real_type, vector_length>::grow_rij(int newnatom, int
|
||||
MemKK::realloc_kokkos(db_gpu,"sna:db_gpu",natom_pad,nmax,3);
|
||||
MemKK::realloc_kokkos(sfac_gpu,"sna:sfac_gpu",natom_pad,nmax,4);
|
||||
MemKK::realloc_kokkos(ulist,"sna:ulist",1,1,1);
|
||||
MemKK::realloc_kokkos(zlist,"sna:zlist",1,1,1);
|
||||
MemKK::realloc_kokkos(zlist_gpu,"sna:zlist_gpu",natom_pad,idxz_max,ndoubles);
|
||||
MemKK::realloc_kokkos(blist,"sna:blist",natom_pad,ntriples,idxb_max);
|
||||
MemKK::realloc_kokkos(blist_gpu,"sna:blist_gpu",natom_pad,idxb_max,ntriples);
|
||||
MemKK::realloc_kokkos(dulist,"sna:dulist",1,1,1);
|
||||
} else {
|
||||
MemKK::realloc_kokkos(a_gpu,"sna:a_gpu",1,1);
|
||||
@ -339,10 +338,6 @@ void SNAKokkos<DeviceType, real_type, vector_length>::grow_rij(int newnatom, int
|
||||
MemKK::realloc_kokkos(db_gpu,"sna:db_gpu",1,1,1);
|
||||
MemKK::realloc_kokkos(sfac_gpu,"sna:sfac_gpu",1,1,1);
|
||||
MemKK::realloc_kokkos(ulist,"sna:ulist",idxu_cache_max,natom_pad,nmax);
|
||||
MemKK::realloc_kokkos(zlist,"sna:zlist",idxz_max,ndoubles,natom_pad);
|
||||
MemKK::realloc_kokkos(zlist_gpu,"sna:zlist_gpu",1,1,1);
|
||||
MemKK::realloc_kokkos(blist,"sna:blist",natom_pad,ntriples,idxb_max);
|
||||
MemKK::realloc_kokkos(blist_gpu,"sna:blist_gpu",1,1,1);
|
||||
MemKK::realloc_kokkos(dulist,"sna:dulist",idxu_cache_max,natom_pad,nmax);
|
||||
}
|
||||
}
|
||||
@ -663,7 +658,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi(const int& iato
|
||||
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
||||
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
||||
|
||||
zlist_gpu(iatom,jjz,idouble) = evaluate_zi(j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, iatom, elem1, elem2, cgblock);
|
||||
zlist(iatom, idouble, jjz) = evaluate_zi(j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, iatom, elem1, elem2, cgblock);
|
||||
|
||||
idouble++;
|
||||
}
|
||||
@ -711,7 +706,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iato
|
||||
const int jjz_index = jjz+mb*(j+1)+ma;
|
||||
if (2*mb == j) return; // I think we can remove this?
|
||||
const complex utot = ulisttot(iatom, elem3, jju_index);
|
||||
const complex zloc = zlist_gpu(iatom, jjz_index, idouble);
|
||||
const complex zloc = zlist(iatom, idouble, jjz_index);
|
||||
sumzu_temp += utot.re * zloc.re + utot.im * zloc.im;
|
||||
}
|
||||
}
|
||||
@ -727,7 +722,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iato
|
||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||
|
||||
const complex utot = ulisttot(iatom, elem3, jju_index);
|
||||
const complex zloc = zlist_gpu(iatom, jjz_index, idouble);
|
||||
const complex zloc = zlist(iatom, idouble, jjz_index);
|
||||
sumzu_temp += utot.re * zloc.re + utot.im * zloc.im;
|
||||
|
||||
}
|
||||
@ -738,7 +733,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iato
|
||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||
|
||||
const complex utot = ulisttot(iatom, elem3, jju_index);
|
||||
const complex zloc = zlist_gpu(iatom, jjz_index, idouble);
|
||||
const complex zloc = zlist(iatom, idouble, jjz_index);
|
||||
sumzu += static_cast<real_type>(0.5) * (utot.re * zloc.re + utot.im * zloc.im);
|
||||
} // end if jeven
|
||||
|
||||
@ -752,7 +747,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iato
|
||||
sumzu -= bzero[j];
|
||||
}
|
||||
}
|
||||
blist_gpu(iatom, jjb, itriple) = sumzu;
|
||||
blist(iatom, itriple, jjb) = sumzu;
|
||||
//} // end loop over j
|
||||
//} // end loop over j1, j2
|
||||
itriple++;
|
||||
@ -818,7 +813,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi_with_zlist(cons
|
||||
int idouble = 0;
|
||||
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
||||
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
||||
const complex ztmp = zlist_gpu(iatom,jjz,idouble);
|
||||
const complex ztmp = zlist(iatom, idouble, jjz);
|
||||
// apply to z(j1,j2,j,ma,mb) to unique element of y(j)
|
||||
// find right y_list[jju] and beta(iatom,jjb) entries
|
||||
// multiply and divide by j+1 factors
|
||||
@ -1240,144 +1235,12 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi_cpu(const int&
|
||||
|
||||
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
||||
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
||||
zlist(jjz, idouble, iatom).re = static_cast<real_type>(0.0);
|
||||
zlist(jjz, idouble, iatom).im = static_cast<real_type>(0.0);
|
||||
|
||||
int jju1 = idxu_block[j1] + (j1+1)*mb1min;
|
||||
int jju2 = idxu_block[j2] + (j2+1)*mb2max;
|
||||
int icgb = mb1min*(j2+1) + mb2max;
|
||||
for (int ib = 0; ib < nb; ib++) {
|
||||
|
||||
real_type suma1_r = static_cast<real_type>(0.0);
|
||||
real_type suma1_i = static_cast<real_type>(0.0);
|
||||
|
||||
int ma1 = ma1min;
|
||||
int ma2 = ma2max;
|
||||
int icga = ma1min * (j2 + 1) + ma2max;
|
||||
for (int ia = 0; ia < na; ia++) {
|
||||
suma1_r += cgblock[icga] * (ulisttot(iatom, elem1, jju1+ma1).re * ulisttot(iatom, elem2, jju2+ma2).re - ulisttot(iatom, elem1, jju1+ma1).im * ulisttot(iatom, elem2, jju2+ma2).im);
|
||||
suma1_i += cgblock[icga] * (ulisttot(iatom, elem1, jju1+ma1).re * ulisttot(iatom, elem2, jju2+ma2).im + ulisttot(iatom, elem1, jju1+ma1).im * ulisttot(iatom, elem2, jju2+ma2).re);
|
||||
ma1++;
|
||||
ma2--;
|
||||
icga += j2;
|
||||
} // end loop over ia
|
||||
|
||||
zlist(jjz, idouble, iatom).re += cgblock[icgb] * suma1_r;
|
||||
zlist(jjz, idouble, iatom).im += cgblock[icgb] * suma1_i;
|
||||
|
||||
jju1 += j1 + 1;
|
||||
jju2 -= j2 + 1;
|
||||
icgb += j2;
|
||||
} // end loop over ib
|
||||
|
||||
if (bnorm_flag) {
|
||||
const real_type scale = static_cast<real_type>(1) / static_cast<real_type>(j + 1);
|
||||
zlist(jjz, idouble, iatom).re *= scale;
|
||||
zlist(jjz, idouble, iatom).im *= scale;
|
||||
}
|
||||
zlist(iatom, idouble, jjz) = evaluate_zi(j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, iatom, elem1, elem2, cgblock);
|
||||
idouble++;
|
||||
} // end loop over elem2
|
||||
} // end loop over elem1
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute Bi by summing conj(Ui)*Zi, CPU version
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom) const
|
||||
{
|
||||
// for j1 = 0,...,twojmax
|
||||
// for j2 = 0,twojmax
|
||||
// for j = |j1-j2|,Min(twojmax,j1+j2),2
|
||||
// b(j1,j2,j) = 0
|
||||
// for mb = 0,...,jmid
|
||||
// for ma = 0,...,j
|
||||
// b(j1,j2,j) +=
|
||||
// 2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb)
|
||||
|
||||
int itriple = 0;
|
||||
int idouble = 0;
|
||||
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
||||
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
||||
int jalloy = idouble; // must be non-const to work around gcc compiler bug
|
||||
for (int elem3 = 0; elem3 < nelements; elem3++) {
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxb_max),
|
||||
[&] (const int& jjb) {
|
||||
const int j1 = idxb(jjb, 0);
|
||||
const int j2 = idxb(jjb, 1);
|
||||
int j = idxb(jjb, 2); // removed "const" to work around GCC 7 bug
|
||||
|
||||
int jjz = idxz_block(j1, j2, j);
|
||||
int jju = idxu_block[j];
|
||||
real_type sumzu = static_cast<real_type>(0.0);
|
||||
real_type sumzu_temp = static_cast<real_type>(0.0);
|
||||
const int bound = (j+2)/2;
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,(j+1)*bound),
|
||||
[&] (const int mbma, real_type& sum) {
|
||||
const int ma = mbma % (j + 1);
|
||||
const int mb = mbma / (j + 1);
|
||||
const int jju_index = jju + mb * (j + 1) + ma;
|
||||
const int jjz_index = jjz + mb * (j + 1) + ma;
|
||||
if (2*mb == j) return;
|
||||
sum +=
|
||||
ulisttot(iatom, elem3, jju_index).re * zlist(jjz_index, jalloy, iatom).re +
|
||||
ulisttot(iatom, elem3, jju_index).im * zlist(jjz_index, jalloy, iatom).im;
|
||||
},sumzu_temp); // end loop over ma, mb
|
||||
sumzu += sumzu_temp;
|
||||
|
||||
// For j even, special treatment for middle column
|
||||
|
||||
if (j%2 == 0) {
|
||||
int mb = j/2; // removed "const" to work around GCC 7 bug
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, mb),
|
||||
[&] (const int ma, real_type& sum) {
|
||||
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||
sum +=
|
||||
ulisttot(iatom, elem3, jju_index).re * zlist(jjz_index, jalloy, iatom).re +
|
||||
ulisttot(iatom, elem3, jju_index).im * zlist(jjz_index, jalloy, iatom).im;
|
||||
},sumzu_temp); // end loop over ma
|
||||
sumzu += sumzu_temp;
|
||||
|
||||
const int ma = mb;
|
||||
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||
sumzu += static_cast<real_type>(0.5)*
|
||||
(ulisttot(iatom, elem3, jju_index).re * zlist(jjz_index, jalloy, iatom).re +
|
||||
ulisttot(iatom, elem3, jju_index).im * zlist(jjz_index, jalloy, iatom).im);
|
||||
} // end if jeven
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
sumzu *= static_cast<real_type>(2.0);
|
||||
|
||||
// apply bzero shift
|
||||
|
||||
if (bzero_flag) {
|
||||
if (!wselfall_flag) {
|
||||
if (elem1 == elem2 && elem1 == elem3) {
|
||||
sumzu -= bzero[j];
|
||||
}
|
||||
} else {
|
||||
sumzu -= bzero[j];
|
||||
}
|
||||
}
|
||||
|
||||
blist(iatom, itriple, jjb) = sumzu;
|
||||
});
|
||||
});
|
||||
//} // end loop over j
|
||||
//} // end loop over j1, j2
|
||||
itriple++;
|
||||
}
|
||||
idouble++;
|
||||
} // end loop over elem2
|
||||
} // end loop over elem1
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute Yi from Ui without storing Zi, looping over zlist indices,
|
||||
CPU version
|
||||
@ -2295,6 +2158,9 @@ double SNAKokkos<DeviceType, real_type, vector_length>::memory_usage()
|
||||
bytes += MemKK::memory_usage(ulisttot_im);
|
||||
bytes += MemKK::memory_usage(ulisttot);
|
||||
|
||||
bytes += MemKK::memory_usage(zlist);
|
||||
bytes += MemKK::memory_usage(blist);
|
||||
|
||||
bytes += MemKK::memory_usage(ylist_re);
|
||||
bytes += MemKK::memory_usage(ylist_im);
|
||||
|
||||
@ -2304,15 +2170,8 @@ double SNAKokkos<DeviceType, real_type, vector_length>::memory_usage()
|
||||
bytes += MemKK::memory_usage(da_gpu);
|
||||
bytes += MemKK::memory_usage(db_gpu);
|
||||
bytes += MemKK::memory_usage(sfac_gpu);
|
||||
|
||||
bytes += MemKK::memory_usage(zlist_gpu);
|
||||
bytes += MemKK::memory_usage(blist_gpu);
|
||||
} else {
|
||||
bytes += MemKK::memory_usage(ulist);
|
||||
|
||||
bytes += MemKK::memory_usage(zlist);
|
||||
bytes += MemKK::memory_usage(blist);
|
||||
|
||||
bytes += MemKK::memory_usage(dulist);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user