mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-19 08:17:05 +01:00
Make view specify where and drive data motion - first cut.
This is a compile tiime option --enable-unified=yes/no
This commit is contained in:
@ -233,10 +233,10 @@ public:
|
||||
Uconj = where(coor==neglink,-Uconj,Uconj);
|
||||
}
|
||||
|
||||
auto U_v = U.View();
|
||||
auto Uds_v = Uds.View();
|
||||
auto Uconj_v = Uconj.View();
|
||||
auto Utmp_v= Utmp.View();
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto Uds_v = Uds.View(CpuWrite);
|
||||
auto Uconj_v = Uconj.View(CpuRead);
|
||||
auto Utmp_v= Utmp.View(CpuWrite);
|
||||
thread_foreach(ss,U_v,{
|
||||
Uds_v[ss](0)(mu) = U_v[ss]();
|
||||
Uds_v[ss](1)(mu) = Uconj_v[ss]();
|
||||
@ -272,8 +272,8 @@ public:
|
||||
GaugeLinkField link(mat.Grid());
|
||||
// use lorentz for flavour as hack.
|
||||
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
|
||||
auto link_v = link.View();
|
||||
auto tmp_v = tmp.View();
|
||||
auto link_v = link.View(CpuWrite);
|
||||
auto tmp_v = tmp.View(CpuRead);
|
||||
thread_foreach(ss,tmp_v,{
|
||||
link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1));
|
||||
});
|
||||
@ -306,9 +306,9 @@ public:
|
||||
|
||||
GaugeLinkField tmp(mat.Grid());
|
||||
tmp = Zero();
|
||||
auto tmp_v = tmp.View();
|
||||
auto Atilde_v = Atilde.View();
|
||||
auto Btilde_v = Btilde.View();
|
||||
auto tmp_v = tmp.View(CpuWrite);
|
||||
auto Atilde_v = Atilde.View(CpuRead);
|
||||
auto Btilde_v = Btilde.View(CpuRead);
|
||||
thread_for(ss,tmp.Grid()->oSites(),{
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
int sF = s + Ls * ss;
|
||||
|
@ -264,8 +264,8 @@ private:
|
||||
{
|
||||
CloverFieldType T(F.Grid());
|
||||
T = Zero();
|
||||
auto T_v = T.View();
|
||||
auto F_v = F.View();
|
||||
auto T_v = T.View(CpuWrite);
|
||||
auto F_v = F.View(CpuRead);
|
||||
thread_for(i, CloverTerm.Grid()->oSites(),
|
||||
{
|
||||
T_v[i]()(0, 1) = timesMinusI(F_v[i]()());
|
||||
@ -282,8 +282,8 @@ private:
|
||||
CloverFieldType T(F.Grid());
|
||||
T = Zero();
|
||||
|
||||
auto T_v = T.View();
|
||||
auto F_v = F.View();
|
||||
auto T_v = T.View(CpuWrite);
|
||||
auto F_v = F.View(CpuRead);
|
||||
thread_for(i, CloverTerm.Grid()->oSites(),
|
||||
{
|
||||
T_v[i]()(0, 1) = -F_v[i]()();
|
||||
@ -300,8 +300,8 @@ private:
|
||||
CloverFieldType T(F.Grid());
|
||||
T = Zero();
|
||||
|
||||
auto T_v = T.View();
|
||||
auto F_v = F.View();
|
||||
auto T_v = T.View(CpuWrite);
|
||||
auto F_v = F.View(CpuRead);
|
||||
thread_for(i, CloverTerm.Grid()->oSites(),
|
||||
{
|
||||
T_v[i]()(0, 0) = timesMinusI(F_v[i]()());
|
||||
@ -318,8 +318,8 @@ private:
|
||||
CloverFieldType T(F.Grid());
|
||||
T = Zero();
|
||||
|
||||
auto T_v = T.View();
|
||||
auto F_v = F.View();
|
||||
auto T_v = T.View(CpuWrite);
|
||||
auto F_v = F.View(CpuRead);
|
||||
thread_for(i, CloverTerm.Grid()->oSites(),
|
||||
{
|
||||
T_v[i]()(0, 1) = timesI(F_v[i]()());
|
||||
@ -336,8 +336,8 @@ private:
|
||||
CloverFieldType T(F.Grid());
|
||||
T = Zero();
|
||||
|
||||
auto T_v = T.View();
|
||||
auto F_v = F.View();
|
||||
auto T_v = T.View(CpuWrite);
|
||||
auto F_v = F.View(CpuRead);
|
||||
thread_for(i, CloverTerm.Grid()->oSites(),
|
||||
{
|
||||
T_v[i]()(0, 1) = -(F_v[i]()());
|
||||
@ -355,8 +355,8 @@ private:
|
||||
|
||||
T = Zero();
|
||||
|
||||
auto T_v = T.View();
|
||||
auto F_v = F.View();
|
||||
auto T_v = T.View(CpuWrite);
|
||||
auto F_v = F.View(CpuRead);
|
||||
thread_for(i, CloverTerm.Grid()->oSites(),
|
||||
{
|
||||
T_v[i]()(0, 0) = timesI(F_v[i]()());
|
||||
|
@ -106,9 +106,9 @@ public:
|
||||
const _SpinorField & phi,
|
||||
int mu)
|
||||
{
|
||||
auto out_v= out.View();
|
||||
auto phi_v= phi.View();
|
||||
auto Umu_v= Umu.View();
|
||||
auto out_v= out.View(CpuWrite);
|
||||
auto phi_v= phi.View(CpuRead);
|
||||
auto Umu_v= Umu.View(CpuRead);
|
||||
thread_for(sss,out.Grid()->oSites(),{
|
||||
multLink(out_v[sss],Umu_v[sss],phi_v[sss],mu);
|
||||
});
|
||||
@ -191,9 +191,9 @@ public:
|
||||
int Ls=Btilde.Grid()->_fdimensions[0];
|
||||
GaugeLinkField tmp(mat.Grid());
|
||||
tmp = Zero();
|
||||
auto tmp_v = tmp.View();
|
||||
auto Btilde_v = Btilde.View();
|
||||
auto Atilde_v = Atilde.View();
|
||||
auto tmp_v = tmp.View(CpuWrite);
|
||||
auto Btilde_v = Btilde.View(CpuRead);
|
||||
auto Atilde_v = Atilde.View(CpuRead);
|
||||
thread_for(sss,tmp.Grid()->oSites(),{
|
||||
int sU=sss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
|
@ -50,9 +50,9 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto phi = phi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
auto pdiag = &diag[0];
|
||||
@ -93,9 +93,9 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
||||
{
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto phi = phi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
auto pdiag = &diag[0];
|
||||
@ -131,8 +131,8 @@ CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
|
||||
int Ls=this->Ls;
|
||||
|
||||
@ -193,8 +193,8 @@ CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi
|
||||
GridBase *grid=psi_i.Grid();
|
||||
int Ls=this->Ls;
|
||||
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
|
||||
auto plee = & lee [0];
|
||||
auto pdee = & dee [0];
|
||||
|
@ -65,9 +65,9 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(CpuRead);
|
||||
auto phi = phi_i.View(CpuRead);
|
||||
auto chi = chi_i.View(CpuWrite);
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd= Simd::Nsimd();
|
||||
@ -213,9 +213,9 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
||||
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
auto psi=psi_i.View();
|
||||
auto phi=phi_i.View();
|
||||
auto chi=chi_i.View();
|
||||
auto psi=psi_i.View(CpuRead);
|
||||
auto phi=phi_i.View(CpuRead);
|
||||
auto chi=chi_i.View(CpuWrite);
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd= Simd::Nsimd();
|
||||
@ -357,8 +357,8 @@ CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi_i, FermionField
|
||||
Vector<iSinglet<Simd> > &Matm)
|
||||
{
|
||||
EnableIf<Impl::LsVectorised&&EnableBool,int> sfinae=0;
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(CpuRead);
|
||||
auto chi = chi_i.View(CpuWrite);
|
||||
#ifndef AVX512
|
||||
{
|
||||
SiteHalfSpinor BcastP;
|
||||
@ -535,8 +535,8 @@ CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi_i, FermionField
|
||||
EnableIf<Impl::LsVectorised,int> sfinae=0;
|
||||
#ifndef AVX512
|
||||
{
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(CpuRead);
|
||||
auto chi = chi_i.View(CpuWrite);
|
||||
|
||||
SiteHalfSpinor BcastP;
|
||||
SiteHalfSpinor BcastM;
|
||||
@ -586,8 +586,8 @@ CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi_i, FermionField
|
||||
}
|
||||
#else
|
||||
{
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(CpuRead);
|
||||
auto chi = chi_i.View(CpuWrite);
|
||||
// pointers
|
||||
// MASK_REGS;
|
||||
#define Chi_00 %zmm0
|
||||
|
@ -46,9 +46,9 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionFi
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
int Ls = this->Ls;
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto phi = phi_i.View();
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto phi = phi_i.View(AcceleratorRead);
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
auto pdiag = &diag[0];
|
||||
auto pupper = &upper[0];
|
||||
@ -82,9 +82,9 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const Fermio
|
||||
GridBase* grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto phi = phi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
auto pdiag = &diag[0];
|
||||
auto pupper = &upper[0];
|
||||
@ -116,8 +116,8 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionFie
|
||||
{
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi=psi_i.View();
|
||||
auto chi=chi_i.View();
|
||||
auto psi=psi_i.View(AcceleratorRead);
|
||||
auto chi=chi_i.View(AcceleratorWrite);
|
||||
int Ls = this->Ls;
|
||||
|
||||
auto plee = & this->lee[0];
|
||||
@ -172,8 +172,8 @@ void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, Fermion
|
||||
{
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
int Ls = this->Ls;
|
||||
|
||||
auto plee = & this->lee[0];
|
||||
|
@ -221,10 +221,10 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionFi
|
||||
|
||||
Compressor compressor;
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
auto Umu_v = Umu.View();
|
||||
auto UUUmu_v = UUUmu.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto Umu_v = Umu.View(CpuRead);
|
||||
auto UUUmu_v = UUUmu.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
thread_for( ss,Umu.Grid()->oSites(),{
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sU=ss;
|
||||
@ -339,10 +339,10 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
|
||||
}
|
||||
|
||||
// do the compute
|
||||
auto U_v = U.View();
|
||||
auto UUU_v = UUU.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
|
||||
if (dag == DaggerYes) {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
@ -376,10 +376,10 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
|
||||
|
||||
DhopComputeTime2-=usecond();
|
||||
|
||||
auto U_v = U.View();
|
||||
auto UUU_v = UUU.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
if (dag == DaggerYes) {
|
||||
int sz=st.surface_list.size();
|
||||
thread_for( ss,sz,{
|
||||
@ -418,10 +418,10 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
||||
|
||||
DhopComputeTime -= usecond();
|
||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||
auto U_v = U.View();
|
||||
auto UUU_v = UUU.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
if (dag == DaggerYes) {
|
||||
thread_for( ss,U.Grid()->oSites(),{
|
||||
int sU=ss;
|
||||
|
@ -250,10 +250,10 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
|
||||
////////////////////////
|
||||
// Call the single hop
|
||||
////////////////////////
|
||||
auto U_v = U.View();
|
||||
auto UUU_v = UUU.View();
|
||||
auto B_v = B.View();
|
||||
auto Btilde_v = Btilde.View();
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto B_v = B.View(CpuWrite);
|
||||
auto Btilde_v = Btilde.View(CpuWrite);
|
||||
thread_for(sss,B.Grid()->oSites(),{
|
||||
Kernels::DhopDirKernel(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
|
||||
});
|
||||
@ -378,10 +378,10 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
|
||||
|
||||
Compressor compressor;
|
||||
Stencil.HaloExchange(in, compressor);
|
||||
auto Umu_v = Umu.View();
|
||||
auto UUUmu_v = UUUmu.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto Umu_v = Umu.View(CpuRead);
|
||||
auto UUUmu_v = UUUmu.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
thread_for( sss, in.Grid()->oSites(),{
|
||||
Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
|
||||
});
|
||||
@ -449,10 +449,10 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
||||
}
|
||||
|
||||
// do the compute
|
||||
auto U_v = U.View();
|
||||
auto UUU_v = UUU.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
if (dag == DaggerYes) {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
int sU = ss;
|
||||
@ -479,10 +479,10 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
||||
|
||||
DhopComputeTime2 -= usecond();
|
||||
{
|
||||
auto U_v = U.View();
|
||||
auto UUU_v = UUU.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
if (dag == DaggerYes) {
|
||||
int sz=st.surface_list.size();
|
||||
thread_for(ss,sz,{
|
||||
@ -520,10 +520,10 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
|
||||
st.HaloExchange(in, compressor);
|
||||
DhopCommTime += usecond();
|
||||
|
||||
auto U_v = U.View();
|
||||
auto UUU_v = UUU.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
DhopComputeTime -= usecond();
|
||||
if (dag == DaggerYes) {
|
||||
thread_for(sss, in.Grid()->oSites(),{
|
||||
|
@ -44,9 +44,9 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto phi = phi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
@ -84,9 +84,9 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto phi = phi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
|
||||
auto pm = this->pm;
|
||||
int shift_s = (pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||
@ -132,9 +132,9 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionFie
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto phi = phi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
@ -174,9 +174,9 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto phi = phi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
@ -226,8 +226,8 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
|
||||
auto plee = & this->lee [0];
|
||||
auto pdee = & this->dee [0];
|
||||
@ -286,8 +286,8 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
|
||||
auto pm = this->pm;
|
||||
auto plee = & this->lee [0];
|
||||
@ -354,8 +354,8 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
|
||||
auto plee = & this->lee [0];
|
||||
auto pdee = & this->dee [0];
|
||||
@ -410,8 +410,8 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi
|
||||
{
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
auto psi = psi_i.View(AcceleratorRead);
|
||||
auto chi = chi_i.View(AcceleratorWrite);
|
||||
int Ls = this->Ls;
|
||||
|
||||
auto pm = this->pm;
|
||||
|
@ -475,12 +475,12 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
// Inefficient comms method but not performance critical.
|
||||
tmp1 = Cshift(q_in_1, mu, 1);
|
||||
tmp2 = Cshift(q_in_2, mu, 1);
|
||||
auto tmp1_v = tmp1.View();
|
||||
auto tmp2_v = tmp2.View();
|
||||
auto q_in_1_v=q_in_1.View();
|
||||
auto q_in_2_v=q_in_2.View();
|
||||
auto q_out_v = q_out.View();
|
||||
auto Umu_v = Umu.View();
|
||||
auto tmp1_v = tmp1.View(CpuWrite);
|
||||
auto tmp2_v = tmp2.View(CpuWrite);
|
||||
auto q_in_1_v=q_in_1.View(CpuRead);
|
||||
auto q_in_2_v=q_in_2.View(CpuRead);
|
||||
auto q_out_v = q_out.View(CpuRead);
|
||||
auto Umu_v = Umu.View(CpuRead);
|
||||
thread_for(sU, Umu.Grid()->oSites(),{
|
||||
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU],
|
||||
q_in_2_v[sU],
|
||||
@ -526,11 +526,11 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
tmp = lattice_cmplx*q_in;
|
||||
tmpBwd = Cshift(tmp, mu, -1);
|
||||
|
||||
auto coords_v = coords.View();
|
||||
auto tmpFwd_v = tmpFwd.View();
|
||||
auto tmpBwd_v = tmpBwd.View();
|
||||
auto Umu_v = Umu.View();
|
||||
auto q_out_v = q_out.View();
|
||||
auto coords_v = coords.View(CpuRead);
|
||||
auto tmpFwd_v = tmpFwd.View(CpuRead);
|
||||
auto tmpBwd_v = tmpBwd.View(CpuRead);
|
||||
auto Umu_v = Umu.View(CpuRead);
|
||||
auto q_out_v = q_out.View(CpuWrite);
|
||||
|
||||
thread_for(sU, Umu.Grid()->oSites(), {
|
||||
|
||||
|
@ -348,18 +348,18 @@ template <class Impl>
|
||||
void WilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
|
||||
int Nsite, const FermionField &in, std::vector<FermionField> &out)
|
||||
{
|
||||
auto U_v = U.View();
|
||||
auto in_v = in.View();
|
||||
auto st_v = st.View();
|
||||
auto U_v = U.View(AcceleratorRead);
|
||||
auto in_v = in.View(AcceleratorRead);
|
||||
auto st_v = st.View(AcceleratorRead);
|
||||
|
||||
auto out_Xm = out[0].View();
|
||||
auto out_Ym = out[1].View();
|
||||
auto out_Zm = out[2].View();
|
||||
auto out_Tm = out[3].View();
|
||||
auto out_Xp = out[4].View();
|
||||
auto out_Yp = out[5].View();
|
||||
auto out_Zp = out[6].View();
|
||||
auto out_Tp = out[7].View();
|
||||
auto out_Xm = out[0].View(AcceleratorWrite);
|
||||
auto out_Ym = out[1].View(AcceleratorWrite);
|
||||
auto out_Zm = out[2].View(AcceleratorWrite);
|
||||
auto out_Tm = out[3].View(AcceleratorWrite);
|
||||
auto out_Xp = out[4].View(AcceleratorWrite);
|
||||
auto out_Yp = out[5].View(AcceleratorWrite);
|
||||
auto out_Zp = out[6].View(AcceleratorWrite);
|
||||
auto out_Tp = out[7].View(AcceleratorWrite);
|
||||
auto CBp=st.CommBuf();
|
||||
accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{
|
||||
int sU=sss/Ls;
|
||||
@ -383,10 +383,10 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
||||
assert(dirdisp<=7);
|
||||
assert(dirdisp>=0);
|
||||
|
||||
auto U_v = U.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto st_v = st.View();
|
||||
auto U_v = U.View(AcceleratorRead);
|
||||
auto in_v = in.View(AcceleratorRead);
|
||||
auto out_v = out.View(AcceleratorWrite);
|
||||
auto st_v = st.View(AcceleratorRead);
|
||||
auto CBp=st.CommBuf();
|
||||
#define LoopBody(Dir) \
|
||||
case Dir : \
|
||||
@ -438,10 +438,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||
int interior,int exterior)
|
||||
{
|
||||
auto U_v = U.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto st_v = st.View();
|
||||
auto U_v = U.View(AcceleratorRead);
|
||||
auto in_v = in.View(AcceleratorRead);
|
||||
auto out_v = out.View(AcceleratorWrite);
|
||||
auto st_v = st.View(AcceleratorRead);
|
||||
|
||||
if( interior && exterior ) {
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
|
||||
@ -469,10 +469,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||
int interior,int exterior)
|
||||
{
|
||||
auto U_v = U.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
auto st_v = st.View();
|
||||
auto U_v = U.View(AcceleratorRead);
|
||||
auto in_v = in.View(AcceleratorRead);
|
||||
auto out_v = out.View(AcceleratorWrite);
|
||||
auto st_v = st.View(AcceleratorRead);
|
||||
|
||||
if( interior && exterior ) {
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDag); return;}
|
||||
|
@ -86,8 +86,8 @@ public:
|
||||
|
||||
// Move this elsewhere? FIXME
|
||||
static inline void AddLink(Field &U, LinkField &W, int mu) { // U[mu] += W
|
||||
auto U_v = U.View();
|
||||
auto W_v = W.View();
|
||||
auto U_v = U.View(CpuWrite);
|
||||
auto W_v = W.View(CpuRead);
|
||||
thread_for( ss, U.Grid()->oSites(), {
|
||||
U_v[ss](mu) = U_v[ss](mu) + W_v[ss]();
|
||||
});
|
||||
@ -131,8 +131,8 @@ public:
|
||||
//static std::chrono::duration<double> diff;
|
||||
|
||||
//auto start = std::chrono::high_resolution_clock::now();
|
||||
auto U_v = U.View();
|
||||
auto P_v = P.View();
|
||||
auto U_v = U.View(CpuWrite);
|
||||
auto P_v = P.View(CpuRead);
|
||||
thread_for(ss, P.Grid()->oSites(),{
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U_v[ss](mu) = ProjectOnGroup(Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu));
|
||||
|
@ -89,8 +89,8 @@ public:
|
||||
action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared;
|
||||
|
||||
|
||||
auto p_v = p.View();
|
||||
auto action_v = action.View();
|
||||
auto p_v = p.View(CpuRead);
|
||||
auto action_v = action.View(CpuWrite);
|
||||
for (int mu = 0; mu < Ndim; mu++)
|
||||
{
|
||||
// pshift = Cshift(p, mu, +1); // not efficient, implement with stencils
|
||||
@ -146,8 +146,8 @@ public:
|
||||
for (int point = 0; point < npoint; point++)
|
||||
{
|
||||
|
||||
auto p_v = p.View();
|
||||
auto force_v = force.View();
|
||||
auto p_v = p.View(CpuRead);
|
||||
auto force_v = force.View(CpuWrite);
|
||||
|
||||
int permute_type;
|
||||
StencilEntry *SE;
|
||||
|
@ -49,7 +49,7 @@ public:
|
||||
|
||||
private:
|
||||
const unsigned int smearingLevels;
|
||||
Smear_Stout<Gimpl> StoutSmearing;
|
||||
Smear_Stout<Gimpl> *StoutSmearing;
|
||||
std::vector<GaugeField> SmearedSet;
|
||||
|
||||
// Member functions
|
||||
@ -72,7 +72,7 @@ private:
|
||||
previous_u = *ThinLinks;
|
||||
for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl)
|
||||
{
|
||||
StoutSmearing.smear(SmearedSet[smearLvl], previous_u);
|
||||
StoutSmearing->smear(SmearedSet[smearLvl], previous_u);
|
||||
previous_u = SmearedSet[smearLvl];
|
||||
|
||||
// For debug purposes
|
||||
@ -93,7 +93,7 @@ private:
|
||||
GaugeLinkField SigmaKPrime_mu(grid);
|
||||
GaugeLinkField GaugeKmu(grid), Cmu(grid);
|
||||
|
||||
StoutSmearing.BaseSmear(C, GaugeK);
|
||||
StoutSmearing->BaseSmear(C, GaugeK);
|
||||
SigmaK = Zero();
|
||||
iLambda = Zero();
|
||||
|
||||
@ -107,7 +107,7 @@ private:
|
||||
pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu);
|
||||
pokeLorentz(iLambda, iLambda_mu, mu);
|
||||
}
|
||||
StoutSmearing.derivative(SigmaK, iLambda,
|
||||
StoutSmearing->derivative(SigmaK, iLambda,
|
||||
GaugeK); // derivative of SmearBase
|
||||
return SigmaK;
|
||||
}
|
||||
@ -144,14 +144,14 @@ private:
|
||||
// Exponential
|
||||
iQ2 = iQ * iQ;
|
||||
iQ3 = iQ * iQ2;
|
||||
StoutSmearing.set_uw(u, w, iQ2, iQ3);
|
||||
StoutSmearing.set_fj(f0, f1, f2, u, w);
|
||||
StoutSmearing->set_uw(u, w, iQ2, iQ3);
|
||||
StoutSmearing->set_fj(f0, f1, f2, u, w);
|
||||
e_iQ = f0 * unity + timesMinusI(f1) * iQ - f2 * iQ2;
|
||||
|
||||
// Getting B1, B2, Gamma and Lambda
|
||||
// simplify this part, reduntant calculations in set_fj
|
||||
xi0 = StoutSmearing.func_xi0(w);
|
||||
xi1 = StoutSmearing.func_xi1(w);
|
||||
xi0 = StoutSmearing->func_xi0(w);
|
||||
xi1 = StoutSmearing->func_xi1(w);
|
||||
u2 = u * u;
|
||||
w2 = w * w;
|
||||
cosw = cos(w);
|
||||
@ -219,7 +219,7 @@ public:
|
||||
/* Standard constructor */
|
||||
SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear,
|
||||
Smear_Stout<Gimpl>& Stout)
|
||||
: smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL)
|
||||
: smearingLevels(Nsmear), StoutSmearing(&Stout), ThinLinks(NULL)
|
||||
{
|
||||
for (unsigned int i = 0; i < smearingLevels; ++i)
|
||||
SmearedSet.push_back(*(new GaugeField(UGrid)));
|
||||
@ -227,7 +227,7 @@ public:
|
||||
|
||||
/*! For just thin links */
|
||||
SmearedConfiguration()
|
||||
: smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {}
|
||||
: smearingLevels(0), StoutSmearing(nullptr), SmearedSet(), ThinLinks(NULL) {}
|
||||
|
||||
// attach the smeared routines to the thin links U and fill the smeared set
|
||||
void set_Field(GaugeField &U)
|
||||
|
@ -185,13 +185,13 @@ void A2Autils<FImpl>::MesonField(TensorType &mat,
|
||||
|
||||
for(int i=0;i<Lblock;i++){
|
||||
|
||||
auto lhs_v = lhs_wi[i].View();
|
||||
auto lhs_v = lhs_wi[i].View(CpuRead);
|
||||
auto left = conjugate(lhs_v[ss]);
|
||||
|
||||
for(int j=0;j<Rblock;j++){
|
||||
|
||||
SpinMatrix_v vv;
|
||||
auto rhs_v = rhs_vj[j].View();
|
||||
auto rhs_v = rhs_vj[j].View(CpuRead);
|
||||
auto right = rhs_v[ss];
|
||||
for(int s1=0;s1<Ns;s1++){
|
||||
for(int s2=0;s2<Ns;s2++){
|
||||
@ -204,7 +204,7 @@ void A2Autils<FImpl>::MesonField(TensorType &mat,
|
||||
int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r;
|
||||
for ( int m=0;m<Nmom;m++){
|
||||
int idx = m+base;
|
||||
auto mom_v = mom[m].View();
|
||||
auto mom_v = mom[m].View(CpuRead);
|
||||
auto phase = mom_v[ss];
|
||||
mac(&lvSum[idx],&vv,&phase);
|
||||
}
|
||||
@ -371,7 +371,7 @@ void A2Autils<FImpl>::PionFieldXX(Eigen::Tensor<ComplexD,3> &mat,
|
||||
|
||||
for(int i=0;i<Lblock;i++){
|
||||
|
||||
auto wi_v = wi[i].View();
|
||||
auto wi_v = wi[i].View(CpuRead);
|
||||
auto w = conjugate(wi_v[ss]);
|
||||
if (g5) {
|
||||
w()(2)(0) = - w()(2)(0);
|
||||
@ -383,7 +383,7 @@ void A2Autils<FImpl>::PionFieldXX(Eigen::Tensor<ComplexD,3> &mat,
|
||||
}
|
||||
for(int j=0;j<Rblock;j++){
|
||||
|
||||
auto vj_v=vj[j].View();
|
||||
auto vj_v=vj[j].View(CpuRead);
|
||||
auto v = vj_v[ss];
|
||||
auto vv = v()(0)(0);
|
||||
|
||||
@ -518,12 +518,12 @@ void A2Autils<FImpl>::PionFieldWVmom(Eigen::Tensor<ComplexD,4> &mat,
|
||||
|
||||
for(int i=0;i<Lblock;i++){
|
||||
|
||||
auto wi_v = wi[i].View();
|
||||
auto wi_v = wi[i].View(CpuRead);
|
||||
auto w = conjugate(wi_v[ss]);
|
||||
|
||||
for(int j=0;j<Rblock;j++){
|
||||
|
||||
auto vj_v = vj[j].View();
|
||||
auto vj_v = vj[j].View(CpuRead);
|
||||
auto v = vj_v[ss];
|
||||
|
||||
auto vv = w()(0)(0) * v()(0)(0)// Gamma5 Dirac basis explicitly written out
|
||||
@ -544,7 +544,7 @@ void A2Autils<FImpl>::PionFieldWVmom(Eigen::Tensor<ComplexD,4> &mat,
|
||||
int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r;
|
||||
for ( int m=0;m<Nmom;m++){
|
||||
int idx = m+base;
|
||||
auto mom_v = mom[m].View();
|
||||
auto mom_v = mom[m].View(CpuRead);
|
||||
auto phase = mom_v[ss];
|
||||
mac(&lvSum[idx],&vv,&phase()()());
|
||||
}
|
||||
@ -730,13 +730,13 @@ void A2Autils<FImpl>::AslashField(TensorType &mat,
|
||||
|
||||
for(int i=0;i<Lblock;i++)
|
||||
{
|
||||
auto wi_v = lhs_wi[i].View();
|
||||
auto wi_v = lhs_wi[i].View(CpuRead);
|
||||
auto left = conjugate(wi_v[ss]);
|
||||
|
||||
for(int j=0;j<Rblock;j++)
|
||||
{
|
||||
SpinMatrix_v vv;
|
||||
auto vj_v = rhs_vj[j].View();
|
||||
auto vj_v = rhs_vj[j].View(CpuRead);
|
||||
auto right = vj_v[ss];
|
||||
|
||||
for(int s1=0;s1<Ns;s1++)
|
||||
@ -752,8 +752,8 @@ void A2Autils<FImpl>::AslashField(TensorType &mat,
|
||||
|
||||
for ( int m=0;m<Nem;m++)
|
||||
{
|
||||
auto emB0_v = emB0[m].View();
|
||||
auto emB1_v = emB1[m].View();
|
||||
auto emB0_v = emB0[m].View(CpuRead);
|
||||
auto emB1_v = emB1[m].View(CpuRead);
|
||||
int idx = m+base;
|
||||
auto b0 = emB0_v[ss];
|
||||
auto b1 = emB1_v[ss];
|
||||
@ -1014,12 +1014,12 @@ A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
|
||||
for(int d_o=0;d_o<N_d;d_o+=d_unroll){
|
||||
for(int t=0;t<N_t;t++){
|
||||
for(int s=0;s<N_s;s++){
|
||||
auto vs_v = vs[s].View();
|
||||
auto vs_v = vs[s].View(CpuRead);
|
||||
auto tmp1 = vs_v[ss];
|
||||
vobj tmp2 = Zero();
|
||||
vobj tmp3 = Zero();
|
||||
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
|
||||
auto vd_v = vd[d].View();
|
||||
auto vd_v = vd[d].View(CpuRead);
|
||||
Scalar_v coeff = WW_sd(t,s,d);
|
||||
tmp3 = conjugate(vd_v[ss]);
|
||||
mac(&tmp2, &coeff, &tmp3);
|
||||
@ -1067,12 +1067,12 @@ A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
|
||||
thread_for(ss,grid->oSites(),{
|
||||
for(int d_o=0;d_o<N_d;d_o+=d_unroll){
|
||||
for(int s=0;s<N_s;s++){
|
||||
auto vs_v = vs[s].View();
|
||||
auto vs_v = vs[s].View(CpuRead);
|
||||
auto tmp1 = vs_v[ss];
|
||||
vobj tmp2 = Zero();
|
||||
vobj tmp3 = Zero();
|
||||
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
|
||||
auto vd_v = vd[d].View();
|
||||
auto vd_v = vd[d].View(CpuRead);
|
||||
Scalar_v coeff = buf(s,d);
|
||||
tmp3 = conjugate(vd_v[ss]);
|
||||
mac(&tmp2, &coeff, &tmp3);
|
||||
@ -1093,7 +1093,7 @@ inline void A2Autils<FImpl>::OuterProductWWVV(PropagatorField &WWVV,
|
||||
const vobj &rhs,
|
||||
const int Ns, const int ss)
|
||||
{
|
||||
auto WWVV_v = WWVV.View();
|
||||
auto WWVV_v = WWVV.View(CpuWrite);
|
||||
for (int s1 = 0; s1 < Ns; s1++){
|
||||
for (int s2 = 0; s2 < Ns; s2++){
|
||||
WWVV_v[ss]()(s1,s2)(0, 0) += lhs()(s1)(0) * rhs()(s2)(0);
|
||||
@ -1122,10 +1122,10 @@ void A2Autils<FImpl>::ContractFourQuarkColourDiagonal(const PropagatorField &WWV
|
||||
|
||||
GridBase *grid = WWVV0.Grid();
|
||||
|
||||
auto WWVV0_v = WWVV0.View();
|
||||
auto WWVV1_v = WWVV1.View();
|
||||
auto O_trtr_v= O_trtr.View();
|
||||
auto O_fig8_v= O_fig8.View();
|
||||
auto WWVV0_v = WWVV0.View(CpuRead);
|
||||
auto WWVV1_v = WWVV1.View(CpuRead);
|
||||
auto O_trtr_v= O_trtr.View(CpuWrite);
|
||||
auto O_fig8_v= O_fig8.View(CpuWrite);
|
||||
thread_for(ss,grid->oSites(),{
|
||||
|
||||
typedef typename ComplexField::vector_object vobj;
|
||||
@ -1166,10 +1166,10 @@ void A2Autils<FImpl>::ContractFourQuarkColourMix(const PropagatorField &WWVV0,
|
||||
|
||||
GridBase *grid = WWVV0.Grid();
|
||||
|
||||
auto WWVV0_v = WWVV0.View();
|
||||
auto WWVV1_v = WWVV1.View();
|
||||
auto O_trtr_v= O_trtr.View();
|
||||
auto O_fig8_v= O_fig8.View();
|
||||
auto WWVV0_v = WWVV0.View(CpuRead);
|
||||
auto WWVV1_v = WWVV1.View(CpuRead);
|
||||
auto O_trtr_v= O_trtr.View(CpuWrite);
|
||||
auto O_fig8_v= O_fig8.View(CpuWrite);
|
||||
|
||||
thread_for(ss,grid->oSites(),{
|
||||
|
||||
|
@ -273,10 +273,10 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
||||
for (int ie=0; ie < 6 ; ie++)
|
||||
wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0;
|
||||
|
||||
auto vbaryon_corr= baryon_corr.View();
|
||||
auto v1 = q1_left.View();
|
||||
auto v2 = q2_left.View();
|
||||
auto v3 = q3_left.View();
|
||||
auto vbaryon_corr= baryon_corr.View(CpuWrite);
|
||||
auto v1 = q1_left.View(CpuRead);
|
||||
auto v2 = q2_left.View(CpuRead);
|
||||
auto v3 = q3_left.View(CpuRead);
|
||||
|
||||
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
|
||||
thread_for(ss,grid->oSites(),{
|
||||
@ -560,10 +560,10 @@ void BaryonUtils<FImpl>::Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop,
|
||||
{
|
||||
GridBase *grid = qs_ti.Grid();
|
||||
|
||||
auto vcorr= stn_corr.View();
|
||||
auto vq_loop = qq_loop.View();
|
||||
auto vd_tf = qd_tf.View();
|
||||
auto vs_ti = qs_ti.View();
|
||||
auto vcorr= stn_corr.View(CpuWrite);
|
||||
auto vq_loop = qq_loop.View(CpuRead);
|
||||
auto vd_tf = qd_tf.View(CpuRead);
|
||||
auto vs_ti = qs_ti.View(CpuRead);
|
||||
|
||||
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
|
||||
thread_for(ss,grid->oSites(),{
|
||||
@ -597,11 +597,11 @@ void BaryonUtils<FImpl>::Sigma_to_Nucleon_NonEye(const PropagatorField &qq_ti,
|
||||
{
|
||||
GridBase *grid = qs_ti.Grid();
|
||||
|
||||
auto vcorr= stn_corr.View();
|
||||
auto vq_ti = qq_ti.View();
|
||||
auto vq_tf = qq_tf.View();
|
||||
auto vd_tf = qd_tf.View();
|
||||
auto vs_ti = qs_ti.View();
|
||||
auto vcorr= stn_corr.View(CpuWrite);
|
||||
auto vq_ti = qq_ti.View(CpuRead);
|
||||
auto vq_tf = qq_tf.View(CpuRead);
|
||||
auto vd_tf = qd_tf.View(CpuRead);
|
||||
auto vs_ti = qs_ti.View(CpuRead);
|
||||
|
||||
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
|
||||
thread_for(ss,grid->oSites(),{
|
||||
|
@ -47,8 +47,8 @@ void axpibg5x(Lattice<vobj> &z,const Lattice<vobj> &x,Coeff a,Coeff b)
|
||||
GridBase *grid=x.Grid();
|
||||
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
auto x_v = x.View();
|
||||
auto z_v = z.View();
|
||||
auto x_v = x.View(AcceleratorRead);
|
||||
auto z_v = z.View(AcceleratorWrite);
|
||||
accelerator_for( ss, x_v.size(),vobj::Nsimd(), {
|
||||
auto tmp = a*x_v(ss) + G5*(b*timesI(x_v(ss)));
|
||||
coalescedWrite(z_v[ss],tmp);
|
||||
@ -63,9 +63,9 @@ void axpby_ssp(Lattice<vobj> &z, Coeff a,const Lattice<vobj> &x,Coeff b,const La
|
||||
conformable(x,z);
|
||||
GridBase *grid=x.Grid();
|
||||
int Ls = grid->_rdimensions[0];
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
auto z_v = z.View();
|
||||
auto x_v = x.View(AcceleratorRead);
|
||||
auto y_v = y.View(AcceleratorRead);
|
||||
auto z_v = z.View(AcceleratorWrite);
|
||||
// FIXME -- need a new class of accelerator_loop to implement this
|
||||
//
|
||||
uint64_t nloop = grid->oSites()/Ls;
|
||||
@ -85,9 +85,9 @@ void ag5xpby_ssp(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,const L
|
||||
GridBase *grid=x.Grid();
|
||||
int Ls = grid->_rdimensions[0];
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
auto z_v = z.View();
|
||||
auto x_v = x.View(AcceleratorRead);
|
||||
auto y_v = y.View(AcceleratorRead);
|
||||
auto z_v = z.View(AcceleratorWrite);
|
||||
uint64_t nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
||||
uint64_t ss = sss*Ls;
|
||||
@ -104,9 +104,9 @@ void axpbg5y_ssp(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,const L
|
||||
conformable(x,z);
|
||||
GridBase *grid=x.Grid();
|
||||
int Ls = grid->_rdimensions[0];
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
auto z_v = z.View();
|
||||
auto x_v = x.View(AcceleratorRead);
|
||||
auto y_v = y.View(AcceleratorRead);
|
||||
auto z_v = z.View(AcceleratorWrite);
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
uint64_t nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
||||
@ -125,9 +125,9 @@ void ag5xpbg5y_ssp(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,const
|
||||
GridBase *grid=x.Grid();
|
||||
int Ls = grid->_rdimensions[0];
|
||||
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
auto z_v = z.View();
|
||||
auto x_v = x.View(AcceleratorRead);
|
||||
auto y_v = y.View(AcceleratorRead);
|
||||
auto z_v = z.View(AcceleratorWrite);
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
uint64_t nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
||||
@ -147,9 +147,9 @@ void axpby_ssp_pminus(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,co
|
||||
GridBase *grid=x.Grid();
|
||||
int Ls = grid->_rdimensions[0];
|
||||
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
auto z_v = z.View();
|
||||
auto x_v = x.View(AcceleratorRead);
|
||||
auto y_v = y.View(AcceleratorRead);
|
||||
auto z_v = z.View(AcceleratorWrite);
|
||||
uint64_t nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
||||
uint64_t ss = sss*Ls;
|
||||
@ -168,9 +168,9 @@ void axpby_ssp_pplus(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,con
|
||||
conformable(x,z);
|
||||
GridBase *grid=x.Grid();
|
||||
int Ls = grid->_rdimensions[0];
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
auto z_v = z.View();
|
||||
auto x_v = x.View(AcceleratorRead);
|
||||
auto y_v = y.View(AcceleratorRead);
|
||||
auto z_v = z.View(AcceleratorWrite);
|
||||
uint64_t nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
||||
uint64_t ss = sss*Ls;
|
||||
@ -189,8 +189,8 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
|
||||
conformable(x,z);
|
||||
int Ls = grid->_rdimensions[0];
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
auto x_v = x.View();
|
||||
auto z_v = z.View();
|
||||
auto x_v = x.View(AcceleratorRead);
|
||||
auto z_v = z.View(AcceleratorWrite);
|
||||
uint64_t nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
||||
uint64_t ss = sss*Ls;
|
||||
@ -222,8 +222,8 @@ void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex,
|
||||
static_assert(nbasis % 2 == 0, "");
|
||||
int nb = nbasis / 2;
|
||||
|
||||
auto z_v = z.View();
|
||||
auto x_v = x.View();
|
||||
auto z_v = z.View(AcceleratorWrite);
|
||||
auto x_v = x.View(AcceleratorRead);
|
||||
accelerator_for(ss,grid->oSites(),CComplex::Nsimd(),
|
||||
{
|
||||
for(int n = 0; n < nb; ++n) {
|
||||
|
@ -222,9 +222,9 @@ public:
|
||||
conformable(subgroup, Determinant);
|
||||
int i0, i1;
|
||||
su2SubGroupIndex(i0, i1, su2_index);
|
||||
auto subgroup_v = subgroup.View();
|
||||
auto source_v = source.View();
|
||||
auto Determinant_v = Determinant.View();
|
||||
auto subgroup_v = subgroup.View(CpuWrite);
|
||||
auto source_v = source.View(CpuRead);
|
||||
auto Determinant_v = Determinant.View(CpuWrite);
|
||||
|
||||
thread_for(ss, grid->oSites(), {
|
||||
|
||||
@ -257,8 +257,8 @@ public:
|
||||
su2SubGroupIndex(i0, i1, su2_index);
|
||||
|
||||
dest = 1.0; // start out with identity
|
||||
auto dest_v = dest.View();
|
||||
auto subgroup_v = subgroup.View();
|
||||
auto dest_v = dest.View(CpuWrite);
|
||||
auto subgroup_v = subgroup.View(CpuRead);
|
||||
thread_for(ss, grid->oSites(),
|
||||
{
|
||||
dest_v[ss]()()(i0, i0) = subgroup_v[ss]()()(0, 0);
|
||||
|
Reference in New Issue
Block a user