mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Hide internal data
This commit is contained in:
parent
8b371ffa94
commit
85771e97e9
@ -276,7 +276,7 @@ void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &
|
|||||||
{
|
{
|
||||||
Meooe5D(psi,this->tmp());
|
Meooe5D(psi,this->tmp());
|
||||||
|
|
||||||
if ( psi.checkerboard == Odd ) {
|
if ( psi.Checkerboard() == Odd ) {
|
||||||
this->DhopEO(this->tmp(),chi,DaggerNo);
|
this->DhopEO(this->tmp(),chi,DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
this->DhopOE(this->tmp(),chi,DaggerNo);
|
this->DhopOE(this->tmp(),chi,DaggerNo);
|
||||||
@ -287,7 +287,7 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
// Apply 4d dslash
|
// Apply 4d dslash
|
||||||
if ( psi.checkerboard == Odd ) {
|
if ( psi.Checkerboard() == Odd ) {
|
||||||
this->DhopEO(psi,this->tmp(),DaggerYes);
|
this->DhopEO(psi,this->tmp(),DaggerYes);
|
||||||
} else {
|
} else {
|
||||||
this->DhopOE(psi,this->tmp(),DaggerYes);
|
this->DhopOE(psi,this->tmp(),DaggerYes);
|
||||||
|
@ -47,32 +47,32 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
|||||||
{
|
{
|
||||||
int Ls =this->Ls;
|
int Ls =this->Ls;
|
||||||
GridBase *grid=psi._grid;
|
GridBase *grid=psi._grid;
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
M5Dcalls++;
|
M5Dcalls++;
|
||||||
M5Dtime-=usecond();
|
M5Dtime-=usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
if ( s==0 ) {
|
if ( s==0 ) {
|
||||||
spProj5m(tmp,psi._odata[ss+s+1]);
|
spProj5m(tmp,psi[ss+s+1]);
|
||||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
spProj5p(tmp,psi._odata[ss+Ls-1]);
|
spProj5p(tmp,psi[ss+Ls-1]);
|
||||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
} else if ( s==(Ls-1)) {
|
} else if ( s==(Ls-1)) {
|
||||||
spProj5m(tmp,psi._odata[ss+0]);
|
spProj5m(tmp,psi[ss+0]);
|
||||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
spProj5p(tmp,psi._odata[ss+s-1]);
|
spProj5p(tmp,psi[ss+s-1]);
|
||||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
} else {
|
} else {
|
||||||
spProj5m(tmp,psi._odata[ss+s+1]);
|
spProj5m(tmp,psi[ss+s+1]);
|
||||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
spProj5p(tmp,psi._odata[ss+s-1]);
|
spProj5p(tmp,psi[ss+s-1]);
|
||||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -90,33 +90,33 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
|||||||
{
|
{
|
||||||
int Ls =this->Ls;
|
int Ls =this->Ls;
|
||||||
GridBase *grid=psi._grid;
|
GridBase *grid=psi._grid;
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
M5Dcalls++;
|
M5Dcalls++;
|
||||||
M5Dtime-=usecond();
|
M5Dtime-=usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
if ( s==0 ) {
|
if ( s==0 ) {
|
||||||
spProj5p(tmp,psi._odata[ss+s+1]);
|
spProj5p(tmp,psi[ss+s+1]);
|
||||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
spProj5m(tmp,psi._odata[ss+Ls-1]);
|
spProj5m(tmp,psi[ss+Ls-1]);
|
||||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
} else if ( s==(Ls-1)) {
|
} else if ( s==(Ls-1)) {
|
||||||
spProj5p(tmp,psi._odata[ss+0]);
|
spProj5p(tmp,psi[ss+0]);
|
||||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
spProj5m(tmp,psi._odata[ss+s-1]);
|
spProj5m(tmp,psi[ss+s-1]);
|
||||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
} else {
|
} else {
|
||||||
spProj5p(tmp,psi._odata[ss+s+1]);
|
spProj5p(tmp,psi[ss+s+1]);
|
||||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
spProj5m(tmp,psi._odata[ss+s-1]);
|
spProj5m(tmp,psi[ss+s-1]);
|
||||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -130,13 +130,13 @@ void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &
|
|||||||
GridBase *grid=psi._grid;
|
GridBase *grid=psi._grid;
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
|
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
|
|
||||||
MooeeInvCalls++;
|
MooeeInvCalls++;
|
||||||
MooeeInvTime-=usecond();
|
MooeeInvTime-=usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
|
|
||||||
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
|
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
|
||||||
// Apply (L^{\prime})^{-1}
|
// Apply (L^{\prime})^{-1}
|
||||||
@ -175,8 +175,8 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
|
|||||||
GridBase *grid=psi._grid;
|
GridBase *grid=psi._grid;
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
|
|
||||||
assert(psi.checkerboard == psi.checkerboard);
|
assert(psi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
|
|
||||||
std::vector<Coeff_t> ueec(Ls);
|
std::vector<Coeff_t> ueec(Ls);
|
||||||
std::vector<Coeff_t> deec(Ls);
|
std::vector<Coeff_t> deec(Ls);
|
||||||
@ -195,7 +195,7 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
|
|||||||
|
|
||||||
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
parallel_for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
|
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
|
|
||||||
// Apply (U^{\prime})^{-dagger}
|
// Apply (U^{\prime})^{-dagger}
|
||||||
chi[ss]=psi[ss];
|
chi[ss]=psi[ss];
|
||||||
|
@ -57,7 +57,7 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
|
|||||||
int LLs = psi._grid->_rdimensions[0];
|
int LLs = psi._grid->_rdimensions[0];
|
||||||
int vol = psi._grid->oSites()/LLs;
|
int vol = psi._grid->oSites()/LLs;
|
||||||
|
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
|
|
||||||
assert(Ls==LLs);
|
assert(Ls==LLs);
|
||||||
|
|
||||||
|
@ -89,7 +89,7 @@ void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &
|
|||||||
{
|
{
|
||||||
Coeff_t one(1.0);
|
Coeff_t one(1.0);
|
||||||
Coeff_t czero(0.0);
|
Coeff_t czero(0.0);
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
// Apply (L^{\prime})^{-1}
|
// Apply (L^{\prime})^{-1}
|
||||||
axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0]
|
axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0]
|
||||||
@ -118,7 +118,7 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
|
|||||||
{
|
{
|
||||||
Coeff_t one(1.0);
|
Coeff_t one(1.0);
|
||||||
Coeff_t czero(0.0);
|
Coeff_t czero(0.0);
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
// Apply (U^{\prime})^{-dagger}
|
// Apply (U^{\prime})^{-dagger}
|
||||||
axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0]
|
axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0]
|
||||||
|
@ -68,9 +68,9 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
|||||||
Vector<iSinglet<Simd> > d(LLs);
|
Vector<iSinglet<Simd> > d(LLs);
|
||||||
|
|
||||||
assert(Ls/LLs==nsimd);
|
assert(Ls/LLs==nsimd);
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -212,9 +212,9 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
|||||||
Vector<iSinglet<Simd> > d(LLs);
|
Vector<iSinglet<Simd> > d(LLs);
|
||||||
|
|
||||||
assert(Ls/LLs==nsimd);
|
assert(Ls/LLs==nsimd);
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -765,7 +765,7 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
|
|||||||
int LLs = psi._grid->_rdimensions[0];
|
int LLs = psi._grid->_rdimensions[0];
|
||||||
int vol = psi._grid->oSites()/LLs;
|
int vol = psi._grid->oSites()/LLs;
|
||||||
|
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
|
|
||||||
Vector<iSinglet<Simd> > Matp;
|
Vector<iSinglet<Simd> > Matp;
|
||||||
Vector<iSinglet<Simd> > Matm;
|
Vector<iSinglet<Simd> > Matm;
|
||||||
|
@ -146,7 +146,7 @@ void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, F
|
|||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
// Apply 4d dslash
|
// Apply 4d dslash
|
||||||
if ( psi.checkerboard == Odd ) {
|
if ( psi.Checkerboard() == Odd ) {
|
||||||
this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
|
this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
|
||||||
} else {
|
} else {
|
||||||
this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
|
this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
|
||||||
|
@ -46,29 +46,29 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
|
|||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
GridBase* grid = psi._grid;
|
GridBase* grid = psi._grid;
|
||||||
|
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
this->M5Dtime -= usecond();
|
this->M5Dtime -= usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||||
for(int s=0; s<Ls; s++){
|
for(int s=0; s<Ls; s++){
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
if(s==0) {
|
if(s==0) {
|
||||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
spProj5m(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5p(tmp, psi._odata[ss+Ls-1]);
|
spProj5p(tmp, psi[ss+Ls-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else if(s==(Ls-1)) {
|
} else if(s==(Ls-1)) {
|
||||||
spProj5m(tmp, psi._odata[ss+0]);
|
spProj5m(tmp, psi[ss+0]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
spProj5p(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else {
|
} else {
|
||||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
spProj5m(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
spProj5p(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -83,30 +83,30 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
|
|||||||
{
|
{
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
GridBase* grid = psi._grid;
|
GridBase* grid = psi._grid;
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
this->M5Dtime -= usecond();
|
this->M5Dtime -= usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
for(int s=0; s<Ls; s++){
|
for(int s=0; s<Ls; s++){
|
||||||
if(s==0) {
|
if(s==0) {
|
||||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
spProj5p(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5m(tmp, psi._odata[ss+Ls-1]);
|
spProj5m(tmp, psi[ss+Ls-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else if(s==(Ls-1)) {
|
} else if(s==(Ls-1)) {
|
||||||
spProj5p(tmp, psi._odata[ss+0]);
|
spProj5p(tmp, psi[ss+0]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
spProj5m(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else {
|
} else {
|
||||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
spProj5p(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
spProj5m(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -121,15 +121,15 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField
|
|||||||
GridBase* grid = psi._grid;
|
GridBase* grid = psi._grid;
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
this->MooeeInvCalls++;
|
||||||
this->MooeeInvTime -= usecond();
|
this->MooeeInvTime -= usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||||
|
|
||||||
auto tmp1 = psi._odata[0];
|
auto tmp1 = psi[0];
|
||||||
auto tmp2 = psi._odata[0];
|
auto tmp2 = psi[0];
|
||||||
|
|
||||||
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
|
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
|
||||||
// Apply (L^{\prime})^{-1}
|
// Apply (L^{\prime})^{-1}
|
||||||
@ -169,8 +169,8 @@ void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionFi
|
|||||||
GridBase* grid = psi._grid;
|
GridBase* grid = psi._grid;
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
assert(psi.checkerboard == psi.checkerboard);
|
assert(psi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
std::vector<Coeff_t> ueec(Ls);
|
std::vector<Coeff_t> ueec(Ls);
|
||||||
std::vector<Coeff_t> deec(Ls+1);
|
std::vector<Coeff_t> deec(Ls+1);
|
||||||
@ -192,8 +192,8 @@ void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionFi
|
|||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||||
|
|
||||||
auto tmp1 = psi._odata[0];
|
auto tmp1 = psi[0];
|
||||||
auto tmp2 = psi._odata[0];
|
auto tmp2 = psi[0];
|
||||||
|
|
||||||
// Apply (U^{\prime})^{-dagger}
|
// Apply (U^{\prime})^{-dagger}
|
||||||
chi[ss] = psi[ss];
|
chi[ss] = psi[ss];
|
||||||
|
@ -58,7 +58,7 @@ void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, Fermion
|
|||||||
int LLs = psi._grid->_rdimensions[0];
|
int LLs = psi._grid->_rdimensions[0];
|
||||||
int vol = psi._grid->oSites()/LLs;
|
int vol = psi._grid->oSites()/LLs;
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
assert(Ls==LLs);
|
assert(Ls==LLs);
|
||||||
|
|
||||||
|
@ -83,7 +83,7 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField
|
|||||||
{
|
{
|
||||||
Coeff_t one(1.0);
|
Coeff_t one(1.0);
|
||||||
Coeff_t czero(0.0);
|
Coeff_t czero(0.0);
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
FermionField tmp(psi._grid);
|
FermionField tmp(psi._grid);
|
||||||
@ -117,7 +117,7 @@ void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionFi
|
|||||||
{
|
{
|
||||||
Coeff_t one(1.0);
|
Coeff_t one(1.0);
|
||||||
Coeff_t czero(0.0);
|
Coeff_t czero(0.0);
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
FermionField tmp(psi._grid);
|
FermionField tmp(psi._grid);
|
||||||
|
@ -65,9 +65,9 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
|
|||||||
Vector<iSinglet<Simd> > d(LLs);
|
Vector<iSinglet<Simd> > d(LLs);
|
||||||
|
|
||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -210,9 +210,9 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
|
|||||||
Vector<iSinglet<Simd> > d(LLs);
|
Vector<iSinglet<Simd> > d(LLs);
|
||||||
|
|
||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -536,7 +536,7 @@ void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, Fermion
|
|||||||
int LLs = psi._grid->_rdimensions[0];
|
int LLs = psi._grid->_rdimensions[0];
|
||||||
int vol = psi._grid->oSites()/LLs;
|
int vol = psi._grid->oSites()/LLs;
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
Vector<iSinglet<Simd> > Matp;
|
Vector<iSinglet<Simd> > Matp;
|
||||||
Vector<iSinglet<Simd> > Matm;
|
Vector<iSinglet<Simd> > Matm;
|
||||||
|
@ -432,7 +432,7 @@ public:
|
|||||||
// sum across the 5d dimension
|
// sum across the 5d dimension
|
||||||
for (auto v : vres) scalar_object += v;
|
for (auto v : vres) scalar_object += v;
|
||||||
}
|
}
|
||||||
tmp._odata[so].putlane(scalar_object, si);
|
tmp[so].putlane(scalar_object, si);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
||||||
|
@ -171,21 +171,21 @@ void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD ImprovedStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
RealD ImprovedStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerNo);
|
Dhop(in, out, DaggerNo);
|
||||||
return axpy_norm(out, mass, in, out);
|
return axpy_norm(out, mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD ImprovedStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
RealD ImprovedStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerYes);
|
Dhop(in, out, DaggerYes);
|
||||||
return axpy_norm(out, mass, in, out);
|
return axpy_norm(out, mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||||
if (in.checkerboard == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerNo);
|
DhopEO(in, out, DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
DhopOE(in, out, DaggerNo);
|
DhopOE(in, out, DaggerNo);
|
||||||
@ -193,7 +193,7 @@ void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField
|
|||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||||
if (in.checkerboard == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerYes);
|
DhopEO(in, out, DaggerYes);
|
||||||
} else {
|
} else {
|
||||||
DhopOE(in, out, DaggerYes);
|
DhopOE(in, out, DaggerYes);
|
||||||
@ -202,27 +202,27 @@ void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionFie
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
typename FermionField::scalar_type scal(mass);
|
typename FermionField::scalar_type scal(mass);
|
||||||
out = scal * in;
|
out = scal * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Mooee(in, out);
|
Mooee(in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
out = (1.0 / (mass)) * in;
|
out = (1.0 / (mass)) * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,
|
void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,
|
||||||
FermionField &out) {
|
FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
MooeeInv(in, out);
|
MooeeInv(in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -285,7 +285,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionFie
|
|||||||
conformable(U._grid, V._grid);
|
conformable(U._grid, V._grid);
|
||||||
conformable(U._grid, mat._grid);
|
conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
mat.checkerboard = U.checkerboard;
|
mat.Checkerboard() = U.Checkerboard();
|
||||||
|
|
||||||
DerivInternal(Stencil, Umu, UUUmu, mat, U, V, dag);
|
DerivInternal(Stencil, Umu, UUUmu, mat, U, V, dag);
|
||||||
}
|
}
|
||||||
@ -297,9 +297,9 @@ void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionF
|
|||||||
conformable(U._grid, V._grid);
|
conformable(U._grid, V._grid);
|
||||||
conformable(U._grid, mat._grid);
|
conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
assert(V.checkerboard == Even);
|
assert(V.Checkerboard() == Even);
|
||||||
assert(U.checkerboard == Odd);
|
assert(U.Checkerboard() == Odd);
|
||||||
mat.checkerboard = Odd;
|
mat.Checkerboard() = Odd;
|
||||||
|
|
||||||
DerivInternal(StencilEven, UmuOdd, UUUmuOdd, mat, U, V, dag);
|
DerivInternal(StencilEven, UmuOdd, UUUmuOdd, mat, U, V, dag);
|
||||||
}
|
}
|
||||||
@ -311,9 +311,9 @@ void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionF
|
|||||||
conformable(U._grid, V._grid);
|
conformable(U._grid, V._grid);
|
||||||
conformable(U._grid, mat._grid);
|
conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
assert(V.checkerboard == Odd);
|
assert(V.Checkerboard() == Odd);
|
||||||
assert(U.checkerboard == Even);
|
assert(U.Checkerboard() == Even);
|
||||||
mat.checkerboard = Even;
|
mat.Checkerboard() = Even;
|
||||||
|
|
||||||
DerivInternal(StencilOdd, UmuEven, UUUmuEven, mat, U, V, dag);
|
DerivInternal(StencilOdd, UmuEven, UUUmuEven, mat, U, V, dag);
|
||||||
}
|
}
|
||||||
@ -323,7 +323,7 @@ void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &
|
|||||||
conformable(in._grid, _grid); // verifies full grid
|
conformable(in._grid, _grid); // verifies full grid
|
||||||
conformable(in._grid, out._grid);
|
conformable(in._grid, out._grid);
|
||||||
|
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
|
||||||
DhopInternal(Stencil, Lebesgue, Umu, UUUmu, in, out, dag);
|
DhopInternal(Stencil, Lebesgue, Umu, UUUmu, in, out, dag);
|
||||||
}
|
}
|
||||||
@ -333,8 +333,8 @@ void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField
|
|||||||
conformable(in._grid, _cbgrid); // verifies half grid
|
conformable(in._grid, _cbgrid); // verifies half grid
|
||||||
conformable(in._grid, out._grid); // drops the cb check
|
conformable(in._grid, out._grid); // drops the cb check
|
||||||
|
|
||||||
assert(in.checkerboard == Even);
|
assert(in.Checkerboard() == Even);
|
||||||
out.checkerboard = Odd;
|
out.Checkerboard() = Odd;
|
||||||
|
|
||||||
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, UUUmuOdd, in, out, dag);
|
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, UUUmuOdd, in, out, dag);
|
||||||
}
|
}
|
||||||
@ -344,8 +344,8 @@ void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField
|
|||||||
conformable(in._grid, _cbgrid); // verifies half grid
|
conformable(in._grid, _cbgrid); // verifies half grid
|
||||||
conformable(in._grid, out._grid); // drops the cb check
|
conformable(in._grid, out._grid); // drops the cb check
|
||||||
|
|
||||||
assert(in.checkerboard == Odd);
|
assert(in.Checkerboard() == Odd);
|
||||||
out.checkerboard = Even;
|
out.Checkerboard() = Even;
|
||||||
|
|
||||||
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, UUUmuEven, in, out, dag);
|
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, UUUmuEven, in, out, dag);
|
||||||
}
|
}
|
||||||
|
@ -262,8 +262,8 @@ void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionFie
|
|||||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||||
conformable(in._grid,out._grid); // drops the cb check
|
conformable(in._grid,out._grid); // drops the cb check
|
||||||
|
|
||||||
assert(in.checkerboard==Even);
|
assert(in.Checkerboard()==Even);
|
||||||
out.checkerboard = Odd;
|
out.Checkerboard() = Odd;
|
||||||
|
|
||||||
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,UUUmuOdd,in,out,dag);
|
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,UUUmuOdd,in,out,dag);
|
||||||
}
|
}
|
||||||
@ -274,8 +274,8 @@ void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionFie
|
|||||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||||
conformable(in._grid,out._grid); // drops the cb check
|
conformable(in._grid,out._grid); // drops the cb check
|
||||||
|
|
||||||
assert(in.checkerboard==Odd);
|
assert(in.Checkerboard()==Odd);
|
||||||
out.checkerboard = Even;
|
out.Checkerboard() = Even;
|
||||||
|
|
||||||
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,UUUmuEven,in,out,dag);
|
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,UUUmuEven,in,out,dag);
|
||||||
}
|
}
|
||||||
@ -286,7 +286,7 @@ void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField
|
|||||||
conformable(in._grid,FermionGrid()); // verifies full grid
|
conformable(in._grid,FermionGrid()); // verifies full grid
|
||||||
conformable(in._grid,out._grid);
|
conformable(in._grid,out._grid);
|
||||||
|
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
|
||||||
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
|
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
|
||||||
}
|
}
|
||||||
@ -349,21 +349,21 @@ void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField
|
|||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
|
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerNo);
|
Dhop(in, out, DaggerNo);
|
||||||
return axpy_norm(out, mass, in, out);
|
return axpy_norm(out, mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD ImprovedStaggeredFermion5D<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
RealD ImprovedStaggeredFermion5D<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerYes);
|
Dhop(in, out, DaggerYes);
|
||||||
return axpy_norm(out, mass, in, out);
|
return axpy_norm(out, mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||||
if (in.checkerboard == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerNo);
|
DhopEO(in, out, DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
DhopOE(in, out, DaggerNo);
|
DhopOE(in, out, DaggerNo);
|
||||||
@ -371,7 +371,7 @@ void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionFiel
|
|||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||||
if (in.checkerboard == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerYes);
|
DhopEO(in, out, DaggerYes);
|
||||||
} else {
|
} else {
|
||||||
DhopOE(in, out, DaggerYes);
|
DhopOE(in, out, DaggerYes);
|
||||||
@ -380,27 +380,27 @@ void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionF
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
typename FermionField::scalar_type scal(mass);
|
typename FermionField::scalar_type scal(mass);
|
||||||
out = scal * in;
|
out = scal * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Mooee(in, out);
|
Mooee(in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
out = (1.0 / (mass)) * in;
|
out = (1.0 / (mass)) * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,
|
void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,
|
||||||
FermionField &out) {
|
FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
MooeeInv(in, out);
|
MooeeInv(in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,8 +43,8 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &p
|
|||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
GridBase *grid = psi._grid;
|
GridBase *grid = psi._grid;
|
||||||
|
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
@ -52,21 +52,21 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &p
|
|||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||||
for(int s=0; s<Ls; s++){
|
for(int s=0; s<Ls; s++){
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
if(s==0){
|
if(s==0){
|
||||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
spProj5m(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5p(tmp, psi._odata[ss+Ls-1]);
|
spProj5p(tmp, psi[ss+Ls-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else if(s==(Ls-1)) {
|
} else if(s==(Ls-1)) {
|
||||||
spProj5m(tmp, psi._odata[ss+0]);
|
spProj5m(tmp, psi[ss+0]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
spProj5p(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else {
|
} else {
|
||||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
spProj5m(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
spProj5p(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -84,8 +84,8 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionFi
|
|||||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||||
GridBase *grid = psi._grid;
|
GridBase *grid = psi._grid;
|
||||||
|
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
@ -93,25 +93,25 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionFi
|
|||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||||
for(int s=0; s<Ls; s++){
|
for(int s=0; s<Ls; s++){
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
if(s==0){
|
if(s==0){
|
||||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
spProj5m(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5p(tmp, psi._odata[ss+Ls-1]);
|
spProj5p(tmp, psi[ss+Ls-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else if(s==(Ls-1)) {
|
} else if(s==(Ls-1)) {
|
||||||
spProj5m(tmp, psi._odata[ss+0]);
|
spProj5m(tmp, psi[ss+0]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
spProj5p(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else {
|
} else {
|
||||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
spProj5m(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
spProj5p(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
}
|
}
|
||||||
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
|
if(this->pm == 1){ spProj5p(tmp, psi[ss+shift_s]); }
|
||||||
else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
|
else{ spProj5m(tmp, psi[ss+shift_s]); }
|
||||||
chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
|
chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -126,30 +126,30 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField
|
|||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
GridBase *grid = psi._grid;
|
GridBase *grid = psi._grid;
|
||||||
|
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
this->M5Dtime -= usecond();
|
this->M5Dtime -= usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
for(int s=0; s<Ls; s++){
|
for(int s=0; s<Ls; s++){
|
||||||
if(s==0) {
|
if(s==0) {
|
||||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
spProj5p(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5m(tmp, psi._odata[ss+Ls-1]);
|
spProj5m(tmp, psi[ss+Ls-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else if(s==(Ls-1)) {
|
} else if(s==(Ls-1)) {
|
||||||
spProj5p(tmp, psi._odata[ss+0]);
|
spProj5p(tmp, psi[ss+0]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
spProj5m(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else {
|
} else {
|
||||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
spProj5p(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
spProj5m(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -167,8 +167,8 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const Fermio
|
|||||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||||
GridBase *grid = psi._grid;
|
GridBase *grid = psi._grid;
|
||||||
|
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
@ -176,26 +176,26 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const Fermio
|
|||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||||
chi[ss+Ls-1] = zero;
|
chi[ss+Ls-1] = zero;
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
for(int s=0; s<Ls; s++){
|
for(int s=0; s<Ls; s++){
|
||||||
if(s==0) {
|
if(s==0) {
|
||||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
spProj5p(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5m(tmp, psi._odata[ss+Ls-1]);
|
spProj5m(tmp, psi[ss+Ls-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else if(s==(Ls-1)) {
|
} else if(s==(Ls-1)) {
|
||||||
spProj5p(tmp, psi._odata[ss+0]);
|
spProj5p(tmp, psi[ss+0]);
|
||||||
chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
spProj5m(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
} else {
|
} else {
|
||||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
spProj5p(tmp, psi[ss+s+1]);
|
||||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
spProj5m(tmp, psi[ss+s-1]);
|
||||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||||
}
|
}
|
||||||
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
|
if(this->pm == 1){ spProj5p(tmp, psi[ss+s]); }
|
||||||
else{ spProj5m(tmp, psi._odata[ss+s]); }
|
else{ spProj5m(tmp, psi[ss+s]); }
|
||||||
chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
|
chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -211,14 +211,14 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &ch
|
|||||||
GridBase *grid = psi._grid;
|
GridBase *grid = psi._grid;
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
this->MooeeInvCalls++;
|
||||||
this->MooeeInvTime -= usecond();
|
this->MooeeInvTime -= usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||||
|
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
|
|
||||||
// Apply (L^{\prime})^{-1}
|
// Apply (L^{\prime})^{-1}
|
||||||
chi[ss] = psi[ss]; // chi[0]=psi[0]
|
chi[ss] = psi[ss]; // chi[0]=psi[0]
|
||||||
@ -256,16 +256,16 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionFie
|
|||||||
GridBase *grid = psi._grid;
|
GridBase *grid = psi._grid;
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
this->MooeeInvCalls++;
|
||||||
this->MooeeInvTime -= usecond();
|
this->MooeeInvTime -= usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||||
|
|
||||||
auto tmp1 = psi._odata[0];
|
auto tmp1 = psi[0];
|
||||||
auto tmp2 = psi._odata[0];
|
auto tmp2 = psi[0];
|
||||||
auto tmp2_spProj = psi._odata[0];
|
auto tmp2_spProj = psi[0];
|
||||||
|
|
||||||
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
|
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
|
||||||
chi[ss] = psi[ss]; // chi[0]=psi[0]
|
chi[ss] = psi[ss]; // chi[0]=psi[0]
|
||||||
@ -313,14 +313,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField
|
|||||||
GridBase *grid = psi._grid;
|
GridBase *grid = psi._grid;
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
this->MooeeInvCalls++;
|
||||||
this->MooeeInvTime -= usecond();
|
this->MooeeInvTime -= usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||||
|
|
||||||
auto tmp = psi._odata[0];
|
auto tmp = psi[0];
|
||||||
|
|
||||||
// Apply (U^{\prime})^{-dag}
|
// Apply (U^{\prime})^{-dag}
|
||||||
chi[ss] = psi[ss];
|
chi[ss] = psi[ss];
|
||||||
@ -358,16 +358,16 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, Fermion
|
|||||||
GridBase *grid = psi._grid;
|
GridBase *grid = psi._grid;
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
this->MooeeInvCalls++;
|
||||||
this->MooeeInvTime -= usecond();
|
this->MooeeInvTime -= usecond();
|
||||||
|
|
||||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||||
|
|
||||||
auto tmp1 = psi._odata[0];
|
auto tmp1 = psi[0];
|
||||||
auto tmp2 = psi._odata[0];
|
auto tmp2 = psi[0];
|
||||||
auto tmp2_spProj = psi._odata[0];
|
auto tmp2_spProj = psi[0];
|
||||||
|
|
||||||
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
|
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
|
||||||
chi[ss] = psi[ss];
|
chi[ss] = psi[ss];
|
||||||
|
@ -76,7 +76,7 @@ void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionFiel
|
|||||||
RealD k = this->k;
|
RealD k = this->k;
|
||||||
RealD mq1 = this->mq1;
|
RealD mq1 = this->mq1;
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
assert(Ls==LLs);
|
assert(Ls==LLs);
|
||||||
|
|
||||||
|
@ -131,7 +131,7 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& ch
|
|||||||
|
|
||||||
Coeff_t one(1.0);
|
Coeff_t one(1.0);
|
||||||
Coeff_t czero(0.0);
|
Coeff_t czero(0.0);
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
// Apply (L^{\prime})^{-1}
|
// Apply (L^{\prime})^{-1}
|
||||||
@ -162,7 +162,7 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionFie
|
|||||||
{
|
{
|
||||||
Coeff_t one(1.0);
|
Coeff_t one(1.0);
|
||||||
Coeff_t czero(0.0);
|
Coeff_t czero(0.0);
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
FermionField tmp(psi._grid);
|
FermionField tmp(psi._grid);
|
||||||
@ -203,7 +203,7 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField&
|
|||||||
|
|
||||||
Coeff_t one(1.0);
|
Coeff_t one(1.0);
|
||||||
Coeff_t czero(0.0);
|
Coeff_t czero(0.0);
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
// Apply (U^{\prime})^{-dagger}
|
// Apply (U^{\prime})^{-dagger}
|
||||||
@ -234,7 +234,7 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, Fermion
|
|||||||
{
|
{
|
||||||
Coeff_t one(1.0);
|
Coeff_t one(1.0);
|
||||||
Coeff_t czero(0.0);
|
Coeff_t czero(0.0);
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
FermionField tmp(psi._grid);
|
FermionField tmp(psi._grid);
|
||||||
|
@ -76,9 +76,9 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& p
|
|||||||
Vector<iSinglet<Simd>> d(LLs);
|
Vector<iSinglet<Simd>> d(LLs);
|
||||||
|
|
||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -237,9 +237,9 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
|
|||||||
Vector<iSinglet<Simd>> s(LLs);
|
Vector<iSinglet<Simd>> s(LLs);
|
||||||
|
|
||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -402,9 +402,9 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField
|
|||||||
Vector<iSinglet<Simd>> d(LLs);
|
Vector<iSinglet<Simd>> d(LLs);
|
||||||
|
|
||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -560,9 +560,9 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
|
|||||||
Vector<iSinglet<Simd>> s(LLs);
|
Vector<iSinglet<Simd>> s(LLs);
|
||||||
|
|
||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.checkerboard == psi.checkerboard);
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -913,7 +913,7 @@ void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionFiel
|
|||||||
int LLs = psi._grid->_rdimensions[0];
|
int LLs = psi._grid->_rdimensions[0];
|
||||||
int vol = psi._grid->oSites()/LLs;
|
int vol = psi._grid->oSites()/LLs;
|
||||||
|
|
||||||
chi.checkerboard = psi.checkerboard;
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
Vector<iSinglet<Simd>> Matp;
|
Vector<iSinglet<Simd>> Matp;
|
||||||
Vector<iSinglet<Simd>> Matm;
|
Vector<iSinglet<Simd>> Matm;
|
||||||
|
@ -51,7 +51,7 @@ template<class Impl>
|
|||||||
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
|
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
|
||||||
{
|
{
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
if ( psi.checkerboard == Odd ) {
|
if ( psi.Checkerboard() == Odd ) {
|
||||||
this->DhopEO(psi,chi,DaggerNo);
|
this->DhopEO(psi,chi,DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
this->DhopOE(psi,chi,DaggerNo);
|
this->DhopOE(psi,chi,DaggerNo);
|
||||||
|
@ -51,12 +51,12 @@ public:
|
|||||||
template<typename vobj>
|
template<typename vobj>
|
||||||
void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
|
void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
|
||||||
GridBase *grid=out._grid;
|
GridBase *grid=out._grid;
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now
|
assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now
|
||||||
int Ls = grid->_rdimensions[0];
|
int Ls = grid->_rdimensions[0];
|
||||||
parallel_for(int ss=0;ss<grid->oSites();ss++){
|
parallel_for(int ss=0;ss<grid->oSites();ss++){
|
||||||
vobj tmp = s[ss % Ls]*in._odata[ss];
|
vobj tmp = s[ss % Ls]*in[ss];
|
||||||
vstream(out._odata[ss],tmp);
|
vstream(out[ss],tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,14 +58,14 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
|
|||||||
if (SE->_is_local) {
|
if (SE->_is_local) {
|
||||||
if (SE->_permute) {
|
if (SE->_permute) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
permute(chi, in._odata[SE->_offset], ptype);
|
permute(chi, in[SE->_offset], ptype);
|
||||||
} else {
|
} else {
|
||||||
chi_p = &in._odata[SE->_offset];
|
chi_p = &in[SE->_offset];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p = &buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp);
|
Impl::multLink(Uchi, U[sU], *chi_p, Xp);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Yp
|
// Yp
|
||||||
@ -74,14 +74,14 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
|
|||||||
if (SE->_is_local) {
|
if (SE->_is_local) {
|
||||||
if (SE->_permute) {
|
if (SE->_permute) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
permute(chi, in._odata[SE->_offset], ptype);
|
permute(chi, in[SE->_offset], ptype);
|
||||||
} else {
|
} else {
|
||||||
chi_p = &in._odata[SE->_offset];
|
chi_p = &in[SE->_offset];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p = &buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Yp);
|
Impl::multLinkAdd(Uchi, U[sU], *chi_p, Yp);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Zp
|
// Zp
|
||||||
@ -90,14 +90,14 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
|
|||||||
if (SE->_is_local) {
|
if (SE->_is_local) {
|
||||||
if (SE->_permute) {
|
if (SE->_permute) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
permute(chi, in._odata[SE->_offset], ptype);
|
permute(chi, in[SE->_offset], ptype);
|
||||||
} else {
|
} else {
|
||||||
chi_p = &in._odata[SE->_offset];
|
chi_p = &in[SE->_offset];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p = &buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Zp);
|
Impl::multLinkAdd(Uchi, U[sU], *chi_p, Zp);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Tp
|
// Tp
|
||||||
@ -106,14 +106,14 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
|
|||||||
if (SE->_is_local) {
|
if (SE->_is_local) {
|
||||||
if (SE->_permute) {
|
if (SE->_permute) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
permute(chi, in._odata[SE->_offset], ptype);
|
permute(chi, in[SE->_offset], ptype);
|
||||||
} else {
|
} else {
|
||||||
chi_p = &in._odata[SE->_offset];
|
chi_p = &in[SE->_offset];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p = &buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Tp);
|
Impl::multLinkAdd(Uchi, U[sU], *chi_p, Tp);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Xm
|
// Xm
|
||||||
@ -122,14 +122,14 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
|
|||||||
if (SE->_is_local) {
|
if (SE->_is_local) {
|
||||||
if (SE->_permute) {
|
if (SE->_permute) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
permute(chi, in._odata[SE->_offset], ptype);
|
permute(chi, in[SE->_offset], ptype);
|
||||||
} else {
|
} else {
|
||||||
chi_p = &in._odata[SE->_offset];
|
chi_p = &in[SE->_offset];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p = &buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Xm);
|
Impl::multLinkAdd(Uchi, U[sU], *chi_p, Xm);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Ym
|
// Ym
|
||||||
@ -138,14 +138,14 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
|
|||||||
if (SE->_is_local) {
|
if (SE->_is_local) {
|
||||||
if (SE->_permute) {
|
if (SE->_permute) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
permute(chi, in._odata[SE->_offset], ptype);
|
permute(chi, in[SE->_offset], ptype);
|
||||||
} else {
|
} else {
|
||||||
chi_p = &in._odata[SE->_offset];
|
chi_p = &in[SE->_offset];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p = &buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Ym);
|
Impl::multLinkAdd(Uchi, U[sU], *chi_p, Ym);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Zm
|
// Zm
|
||||||
@ -154,14 +154,14 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
|
|||||||
if (SE->_is_local) {
|
if (SE->_is_local) {
|
||||||
if (SE->_permute) {
|
if (SE->_permute) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
permute(chi, in._odata[SE->_offset], ptype);
|
permute(chi, in[SE->_offset], ptype);
|
||||||
} else {
|
} else {
|
||||||
chi_p = &in._odata[SE->_offset];
|
chi_p = &in[SE->_offset];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p = &buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Zm);
|
Impl::multLinkAdd(Uchi, U[sU], *chi_p, Zm);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Tm
|
// Tm
|
||||||
@ -170,14 +170,14 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
|
|||||||
if (SE->_is_local) {
|
if (SE->_is_local) {
|
||||||
if (SE->_permute) {
|
if (SE->_permute) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
permute(chi, in._odata[SE->_offset], ptype);
|
permute(chi, in[SE->_offset], ptype);
|
||||||
} else {
|
} else {
|
||||||
chi_p = &in._odata[SE->_offset];
|
chi_p = &in[SE->_offset];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p = &buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Tm);
|
Impl::multLinkAdd(Uchi, U[sU], *chi_p, Tm);
|
||||||
|
|
||||||
vstream(out, Uchi);
|
vstream(out, Uchi);
|
||||||
};
|
};
|
||||||
@ -198,7 +198,7 @@ void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou
|
|||||||
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out);
|
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out);
|
||||||
for(int s=0;s<LLs;s++) {
|
for(int s=0;s<LLs;s++) {
|
||||||
int sF=s+LLs*sU;
|
int sF=s+LLs*sU;
|
||||||
out._odata[sF]=-out._odata[sF];
|
out[sF]=-out[sF];
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
@ -210,7 +210,7 @@ void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou
|
|||||||
int sF=s+LLs*sU;
|
int sF=s+LLs*sU;
|
||||||
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||||
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||||
out._odata[sF] =-naive-naik;
|
out[sF] =-naive-naik;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -247,7 +247,7 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
|
|||||||
// assert(sF>=0); assert(sU>=0);
|
// assert(sF>=0); assert(sU>=0);
|
||||||
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||||
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||||
out._odata[sF] =naive+naik;
|
out[sF] =naive+naik;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -589,7 +589,7 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in._odata[o] ; } else { out =(uint64_t) &buf[o]; }
|
//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in[o] ; } else { out =(uint64_t) &buf[o]; }
|
||||||
|
|
||||||
#define CONDITIONAL_MOVE(l,o,out) { const SiteSpinor *ptr = l? in_p : buf; out = (uint64_t) &ptr[o]; }
|
#define CONDITIONAL_MOVE(l,o,out) { const SiteSpinor *ptr = l? in_p : buf; out = (uint64_t) &ptr[o]; }
|
||||||
|
|
||||||
@ -636,10 +636,10 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
CONDITIONAL_MOVE(l3,o3,addr3); \
|
CONDITIONAL_MOVE(l3,o3,addr3); \
|
||||||
PF_CHI(addr3); \
|
PF_CHI(addr3); \
|
||||||
\
|
\
|
||||||
gauge0 =(uint64_t)&UU._odata[sU]( X ); \
|
gauge0 =(uint64_t)&UU[sU]( X ); \
|
||||||
gauge1 =(uint64_t)&UU._odata[sU]( Y ); \
|
gauge1 =(uint64_t)&UU[sU]( Y ); \
|
||||||
gauge2 =(uint64_t)&UU._odata[sU]( Z ); \
|
gauge2 =(uint64_t)&UU[sU]( Z ); \
|
||||||
gauge3 =(uint64_t)&UU._odata[sU]( T );
|
gauge3 =(uint64_t)&UU[sU]( T );
|
||||||
|
|
||||||
// This is the single precision 5th direction vectorised kernel
|
// This is the single precision 5th direction vectorised kernel
|
||||||
#include <simd/Intel512single.h>
|
#include <simd/Intel512single.h>
|
||||||
@ -652,7 +652,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
|||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||||
uint64_t addr0,addr1,addr2,addr3;
|
uint64_t addr0,addr1,addr2,addr3;
|
||||||
const SiteSpinor *in_p; in_p = &in._odata[0];
|
const SiteSpinor *in_p; in_p = &in[0];
|
||||||
|
|
||||||
int o0,o1,o2,o3; // offsets
|
int o0,o1,o2,o3; // offsets
|
||||||
int l0,l1,l2,l3; // local
|
int l0,l1,l2,l3; // local
|
||||||
@ -683,7 +683,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
|||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
||||||
|
|
||||||
addr0 = (uint64_t) &out._odata[sF];
|
addr0 = (uint64_t) &out[sF];
|
||||||
REDUCE(addr0);
|
REDUCE(addr0);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -702,7 +702,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
|||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||||
uint64_t addr0,addr1,addr2,addr3;
|
uint64_t addr0,addr1,addr2,addr3;
|
||||||
const SiteSpinor *in_p; in_p = &in._odata[0];
|
const SiteSpinor *in_p; in_p = &in[0];
|
||||||
|
|
||||||
int o0,o1,o2,o3; // offsets
|
int o0,o1,o2,o3; // offsets
|
||||||
int l0,l1,l2,l3; // local
|
int l0,l1,l2,l3; // local
|
||||||
@ -732,7 +732,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
|||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
||||||
|
|
||||||
addr0 = (uint64_t) &out._odata[sF];
|
addr0 = (uint64_t) &out[sF];
|
||||||
REDUCE(addr0);
|
REDUCE(addr0);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -783,7 +783,7 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
|||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||||
uint64_t addr0,addr1,addr2,addr3;
|
uint64_t addr0,addr1,addr2,addr3;
|
||||||
const SiteSpinor *in_p; in_p = &in._odata[0];
|
const SiteSpinor *in_p; in_p = &in[0];
|
||||||
|
|
||||||
int o0,o1,o2,o3; // offsets
|
int o0,o1,o2,o3; // offsets
|
||||||
int l0,l1,l2,l3; // local
|
int l0,l1,l2,l3; // local
|
||||||
@ -830,7 +830,7 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
|||||||
PERMUTE23;
|
PERMUTE23;
|
||||||
MULT_ADD_XYZT(gauge2,gauge3);
|
MULT_ADD_XYZT(gauge2,gauge3);
|
||||||
|
|
||||||
addr0 = (uint64_t) &out._odata[sF];
|
addr0 = (uint64_t) &out[sF];
|
||||||
REDUCEa(addr0);
|
REDUCEa(addr0);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -848,7 +848,7 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
|
|||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||||
uint64_t addr0,addr1,addr2,addr3;
|
uint64_t addr0,addr1,addr2,addr3;
|
||||||
const SiteSpinor *in_p; in_p = &in._odata[0];
|
const SiteSpinor *in_p; in_p = &in[0];
|
||||||
|
|
||||||
int o0,o1,o2,o3; // offsets
|
int o0,o1,o2,o3; // offsets
|
||||||
int l0,l1,l2,l3; // local
|
int l0,l1,l2,l3; // local
|
||||||
@ -895,7 +895,7 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
|
|||||||
PERMUTE23;
|
PERMUTE23;
|
||||||
MULT_ADD_XYZT(gauge2,gauge3);
|
MULT_ADD_XYZT(gauge2,gauge3);
|
||||||
|
|
||||||
addr0 = (uint64_t) &out._odata[sF];
|
addr0 = (uint64_t) &out[sF];
|
||||||
REDUCEa(addr0);
|
REDUCEa(addr0);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -39,7 +39,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
// To splat or not to splat depends on the implementation
|
// To splat or not to splat depends on the implementation
|
||||||
#define MULT(A,UChi) \
|
#define MULT(A,UChi) \
|
||||||
auto & ref(U._odata[sU](A)); \
|
auto & ref(U[sU](A)); \
|
||||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||||
@ -60,7 +60,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
UChi ## _2 += U_22*Chi_2;
|
UChi ## _2 += U_22*Chi_2;
|
||||||
|
|
||||||
#define MULT_ADD(A,UChi) \
|
#define MULT_ADD(A,UChi) \
|
||||||
auto & ref(U._odata[sU](A)); \
|
auto & ref(U[sU](A)); \
|
||||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||||
@ -105,7 +105,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, Do
|
|||||||
int sF=s+LLs*sU;
|
int sF=s+LLs*sU;
|
||||||
DhopSiteDepthHand(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
DhopSiteDepthHand(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||||
DhopSiteDepthHand(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
DhopSiteDepthHand(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||||
out._odata[sF] =scale*(naive+naik);
|
out[sF] =scale*(naive+naik);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -151,7 +151,7 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
|||||||
perm = SE->_permute;
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHI(in._odata);
|
LOAD_CHI((&in[0]));
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
@ -169,7 +169,7 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
|||||||
perm = SE->_permute;
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHI(in._odata);
|
LOAD_CHI((&in[0]));
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
@ -188,7 +188,7 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
|||||||
perm = SE->_permute;
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHI(in._odata);
|
LOAD_CHI((&in[0]));
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
@ -206,7 +206,7 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
|||||||
perm = SE->_permute;
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHI(in._odata);
|
LOAD_CHI((&in[0]));
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
@ -224,7 +224,7 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
|||||||
perm = SE->_permute;
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHI(in._odata);
|
LOAD_CHI((&in[0]));
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
@ -243,7 +243,7 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
|||||||
perm = SE->_permute;
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHI(in._odata);
|
LOAD_CHI((&in[0]));
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
@ -261,7 +261,7 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
|||||||
perm = SE->_permute;
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHI(in._odata);
|
LOAD_CHI((&in[0]));
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
@ -279,7 +279,7 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
|||||||
perm = SE->_permute;
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHI(in._odata);
|
LOAD_CHI((&in[0]));
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
|
@ -79,21 +79,21 @@ void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) {
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerNo);
|
Dhop(in, out, DaggerNo);
|
||||||
return axpy_norm(out, 4 + mass, in, out);
|
return axpy_norm(out, 4 + mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerYes);
|
Dhop(in, out, DaggerYes);
|
||||||
return axpy_norm(out, 4 + mass, in, out);
|
return axpy_norm(out, 4 + mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||||
if (in.checkerboard == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerNo);
|
DhopEO(in, out, DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
DhopOE(in, out, DaggerNo);
|
DhopOE(in, out, DaggerNo);
|
||||||
@ -102,7 +102,7 @@ void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||||
if (in.checkerboard == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerYes);
|
DhopEO(in, out, DaggerYes);
|
||||||
} else {
|
} else {
|
||||||
DhopOE(in, out, DaggerYes);
|
DhopOE(in, out, DaggerYes);
|
||||||
@ -111,26 +111,26 @@ void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
typename FermionField::scalar_type scal(4.0 + mass);
|
typename FermionField::scalar_type scal(4.0 + mass);
|
||||||
out = scal * in;
|
out = scal * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Mooee(in, out);
|
Mooee(in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
out = (1.0/(4.0+mass))*in;
|
out = (1.0/(4.0+mass))*in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
MooeeInv(in,out);
|
MooeeInv(in,out);
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -233,7 +233,7 @@ void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, cons
|
|||||||
conformable(U._grid, V._grid);
|
conformable(U._grid, V._grid);
|
||||||
conformable(U._grid, mat._grid);
|
conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
mat.checkerboard = U.checkerboard;
|
mat.Checkerboard() = U.Checkerboard();
|
||||||
|
|
||||||
DerivInternal(Stencil, Umu, mat, U, V, dag);
|
DerivInternal(Stencil, Umu, mat, U, V, dag);
|
||||||
}
|
}
|
||||||
@ -245,9 +245,9 @@ void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, co
|
|||||||
//conformable(U._grid, mat._grid); not general, leaving as a comment (Guido)
|
//conformable(U._grid, mat._grid); not general, leaving as a comment (Guido)
|
||||||
// Motivation: look at the SchurDiff operator
|
// Motivation: look at the SchurDiff operator
|
||||||
|
|
||||||
assert(V.checkerboard == Even);
|
assert(V.Checkerboard() == Even);
|
||||||
assert(U.checkerboard == Odd);
|
assert(U.Checkerboard() == Odd);
|
||||||
mat.checkerboard = Odd;
|
mat.Checkerboard() = Odd;
|
||||||
|
|
||||||
DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
|
DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
|
||||||
}
|
}
|
||||||
@ -258,9 +258,9 @@ void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, co
|
|||||||
conformable(U._grid, V._grid);
|
conformable(U._grid, V._grid);
|
||||||
//conformable(U._grid, mat._grid);
|
//conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
assert(V.checkerboard == Odd);
|
assert(V.Checkerboard() == Odd);
|
||||||
assert(U.checkerboard == Even);
|
assert(U.Checkerboard() == Even);
|
||||||
mat.checkerboard = Even;
|
mat.Checkerboard() = Even;
|
||||||
|
|
||||||
DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
|
DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
|
||||||
}
|
}
|
||||||
@ -270,7 +270,7 @@ void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int da
|
|||||||
conformable(in._grid, _grid); // verifies full grid
|
conformable(in._grid, _grid); // verifies full grid
|
||||||
conformable(in._grid, out._grid);
|
conformable(in._grid, out._grid);
|
||||||
|
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
|
||||||
DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
|
DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
|
||||||
}
|
}
|
||||||
@ -280,8 +280,8 @@ void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int
|
|||||||
conformable(in._grid, _cbgrid); // verifies half grid
|
conformable(in._grid, _cbgrid); // verifies half grid
|
||||||
conformable(in._grid, out._grid); // drops the cb check
|
conformable(in._grid, out._grid); // drops the cb check
|
||||||
|
|
||||||
assert(in.checkerboard == Even);
|
assert(in.Checkerboard() == Even);
|
||||||
out.checkerboard = Odd;
|
out.Checkerboard() = Odd;
|
||||||
|
|
||||||
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
|
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
|
||||||
}
|
}
|
||||||
@ -291,8 +291,8 @@ void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int d
|
|||||||
conformable(in._grid, _cbgrid); // verifies half grid
|
conformable(in._grid, _cbgrid); // verifies half grid
|
||||||
conformable(in._grid, out._grid); // drops the cb check
|
conformable(in._grid, out._grid); // drops the cb check
|
||||||
|
|
||||||
assert(in.checkerboard == Odd);
|
assert(in.Checkerboard() == Odd);
|
||||||
out.checkerboard = Even;
|
out.Checkerboard() = Even;
|
||||||
|
|
||||||
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
|
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
|
||||||
}
|
}
|
||||||
@ -368,13 +368,13 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
|||||||
tmp2 = Cshift(q_in_2, mu, 1);
|
tmp2 = Cshift(q_in_2, mu, 1);
|
||||||
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
|
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
|
||||||
{
|
{
|
||||||
Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sU],
|
Kernels::ContractConservedCurrentSiteFwd(tmp1[sU],
|
||||||
q_in_2._odata[sU],
|
q_in_2[sU],
|
||||||
q_out._odata[sU],
|
q_out[sU],
|
||||||
Umu, sU, mu);
|
Umu, sU, mu);
|
||||||
Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sU],
|
Kernels::ContractConservedCurrentSiteBwd(q_in_1[sU],
|
||||||
tmp2._odata[sU],
|
tmp2[sU],
|
||||||
q_out._odata[sU],
|
q_out[sU],
|
||||||
Umu, sU, mu);
|
Umu, sU, mu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -419,31 +419,31 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
{
|
{
|
||||||
// Compute the sequential conserved current insertion only if our simd
|
// Compute the sequential conserved current insertion only if our simd
|
||||||
// object contains a timeslice we need.
|
// object contains a timeslice we need.
|
||||||
vInteger t_mask = ((coords._odata[sU] >= tmin) &&
|
vInteger t_mask = ((coords[sU] >= tmin) &&
|
||||||
(coords._odata[sU] <= tmax));
|
(coords[sU] <= tmax));
|
||||||
Integer timeSlices = Reduce(t_mask);
|
Integer timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
if (timeSlices > 0)
|
if (timeSlices > 0)
|
||||||
{
|
{
|
||||||
Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sU],
|
Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sU],
|
||||||
q_out._odata[sU],
|
q_out[sU],
|
||||||
Umu, sU, mu, t_mask);
|
Umu, sU, mu, t_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Repeat for backward direction.
|
// Repeat for backward direction.
|
||||||
t_mask = ((coords._odata[sU] >= (tmin + tshift)) &&
|
t_mask = ((coords[sU] >= (tmin + tshift)) &&
|
||||||
(coords._odata[sU] <= (tmax + tshift)));
|
(coords[sU] <= (tmax + tshift)));
|
||||||
|
|
||||||
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
||||||
unsigned int t0 = 0;
|
unsigned int t0 = 0;
|
||||||
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 ));
|
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 ));
|
||||||
|
|
||||||
timeSlices = Reduce(t_mask);
|
timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
if (timeSlices > 0)
|
if (timeSlices > 0)
|
||||||
{
|
{
|
||||||
Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sU],
|
Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sU],
|
||||||
q_out._odata[sU],
|
q_out[sU],
|
||||||
Umu, sU, mu, t_mask);
|
Umu, sU, mu, t_mask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -328,7 +328,7 @@ void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
|
|||||||
|
|
||||||
//conformable(GaugeGrid(),mat._grid);// this is not general! leaving as a comment
|
//conformable(GaugeGrid(),mat._grid);// this is not general! leaving as a comment
|
||||||
|
|
||||||
mat.checkerboard = A.checkerboard;
|
mat.Checkerboard() = A.Checkerboard();
|
||||||
|
|
||||||
DerivInternal(Stencil,Umu,mat,A,B,dag);
|
DerivInternal(Stencil,Umu,mat,A,B,dag);
|
||||||
}
|
}
|
||||||
@ -342,9 +342,9 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
|||||||
conformable(A._grid,FermionRedBlackGrid());
|
conformable(A._grid,FermionRedBlackGrid());
|
||||||
conformable(A._grid,B._grid);
|
conformable(A._grid,B._grid);
|
||||||
|
|
||||||
assert(B.checkerboard==Odd);
|
assert(B.Checkerboard()==Odd);
|
||||||
assert(A.checkerboard==Even);
|
assert(A.Checkerboard()==Even);
|
||||||
mat.checkerboard = Even;
|
mat.Checkerboard() = Even;
|
||||||
|
|
||||||
DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
|
DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
|
||||||
}
|
}
|
||||||
@ -359,9 +359,9 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
|||||||
conformable(A._grid,FermionRedBlackGrid());
|
conformable(A._grid,FermionRedBlackGrid());
|
||||||
conformable(A._grid,B._grid);
|
conformable(A._grid,B._grid);
|
||||||
|
|
||||||
assert(B.checkerboard==Even);
|
assert(B.Checkerboard()==Even);
|
||||||
assert(A.checkerboard==Odd);
|
assert(A.Checkerboard()==Odd);
|
||||||
mat.checkerboard = Odd;
|
mat.Checkerboard() = Odd;
|
||||||
|
|
||||||
DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
|
DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
|
||||||
}
|
}
|
||||||
@ -525,8 +525,8 @@ void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int
|
|||||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||||
conformable(in._grid,out._grid); // drops the cb check
|
conformable(in._grid,out._grid); // drops the cb check
|
||||||
|
|
||||||
assert(in.checkerboard==Even);
|
assert(in.Checkerboard()==Even);
|
||||||
out.checkerboard = Odd;
|
out.Checkerboard() = Odd;
|
||||||
|
|
||||||
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
|
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
|
||||||
}
|
}
|
||||||
@ -537,8 +537,8 @@ void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int
|
|||||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||||
conformable(in._grid,out._grid); // drops the cb check
|
conformable(in._grid,out._grid); // drops the cb check
|
||||||
|
|
||||||
assert(in.checkerboard==Odd);
|
assert(in.Checkerboard()==Odd);
|
||||||
out.checkerboard = Even;
|
out.Checkerboard() = Even;
|
||||||
|
|
||||||
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
|
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
|
||||||
}
|
}
|
||||||
@ -549,14 +549,14 @@ void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int d
|
|||||||
conformable(in._grid,FermionGrid()); // verifies full grid
|
conformable(in._grid,FermionGrid()); // verifies full grid
|
||||||
conformable(in._grid,out._grid);
|
conformable(in._grid,out._grid);
|
||||||
|
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
|
||||||
DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
|
DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
|
void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
out.checkerboard=in.checkerboard;
|
out.Checkerboard()=in.Checkerboard();
|
||||||
Dhop(in,out,dag); // -0.5 is included
|
Dhop(in,out,dag); // -0.5 is included
|
||||||
axpy(out,4.0-M5,in,out);
|
axpy(out,4.0-M5,in,out);
|
||||||
}
|
}
|
||||||
@ -754,21 +754,21 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
|||||||
// sites correctly.
|
// sites correctly.
|
||||||
if (Impl::LsVectorised)
|
if (Impl::LsVectorised)
|
||||||
{
|
{
|
||||||
REVERSE_LS(q_in_2._odata[sF2], qSite2, Ls / LLs);
|
REVERSE_LS(q_in_2[sF2], qSite2, Ls / LLs);
|
||||||
REVERSE_LS(tmp2._odata[sF2], qmuSite2, Ls / LLs);
|
REVERSE_LS(tmp2[sF2], qmuSite2, Ls / LLs);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
qSite2 = q_in_2._odata[sF2];
|
qSite2 = q_in_2[sF2];
|
||||||
qmuSite2 = tmp2._odata[sF2];
|
qmuSite2 = tmp2[sF2];
|
||||||
}
|
}
|
||||||
Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sF1],
|
Kernels::ContractConservedCurrentSiteFwd(tmp1[sF1],
|
||||||
qSite2,
|
qSite2,
|
||||||
q_out._odata[sU],
|
q_out[sU],
|
||||||
Umu, sU, mu, axial_sign);
|
Umu, sU, mu, axial_sign);
|
||||||
Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sF1],
|
Kernels::ContractConservedCurrentSiteBwd(q_in_1[sF1],
|
||||||
qmuSite2,
|
qmuSite2,
|
||||||
q_out._odata[sU],
|
q_out[sU],
|
||||||
Umu, sU, mu, axial_sign);
|
Umu, sU, mu, axial_sign);
|
||||||
sF1++;
|
sF1++;
|
||||||
sF2--;
|
sF2--;
|
||||||
@ -821,8 +821,8 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
{
|
{
|
||||||
// Compute the sequential conserved current insertion only if our simd
|
// Compute the sequential conserved current insertion only if our simd
|
||||||
// object contains a timeslice we need.
|
// object contains a timeslice we need.
|
||||||
vInteger t_mask = ((coords._odata[sU] >= tmin) &&
|
vInteger t_mask = ((coords[sU] >= tmin) &&
|
||||||
(coords._odata[sU] <= tmax));
|
(coords[sU] <= tmax));
|
||||||
Integer timeSlices = Reduce(t_mask);
|
Integer timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
if (timeSlices > 0)
|
if (timeSlices > 0)
|
||||||
@ -831,20 +831,20 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
for (unsigned int s = 0; s < LLs; ++s)
|
for (unsigned int s = 0; s < LLs; ++s)
|
||||||
{
|
{
|
||||||
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
||||||
Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sF],
|
Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sF],
|
||||||
q_out._odata[sF], Umu, sU,
|
q_out[sF], Umu, sU,
|
||||||
mu, t_mask, axial_sign);
|
mu, t_mask, axial_sign);
|
||||||
++sF;
|
++sF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Repeat for backward direction.
|
// Repeat for backward direction.
|
||||||
t_mask = ((coords._odata[sU] >= (tmin + tshift)) &&
|
t_mask = ((coords[sU] >= (tmin + tshift)) &&
|
||||||
(coords._odata[sU] <= (tmax + tshift)));
|
(coords[sU] <= (tmax + tshift)));
|
||||||
|
|
||||||
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
||||||
unsigned int t0 = 0;
|
unsigned int t0 = 0;
|
||||||
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 ));
|
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 ));
|
||||||
|
|
||||||
timeSlices = Reduce(t_mask);
|
timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
@ -854,8 +854,8 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
for (unsigned int s = 0; s < LLs; ++s)
|
for (unsigned int s = 0; s < LLs; ++s)
|
||||||
{
|
{
|
||||||
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
||||||
Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sF],
|
Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sF],
|
||||||
q_out._odata[sF], Umu, sU,
|
q_out[sF], Umu, sU,
|
||||||
mu, t_mask, axial_sign);
|
mu, t_mask, axial_sign);
|
||||||
++sF;
|
++sF;
|
||||||
}
|
}
|
||||||
|
@ -47,15 +47,15 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
|||||||
if (SE->_is_local) { \
|
if (SE->_is_local) { \
|
||||||
chi_p = χ \
|
chi_p = χ \
|
||||||
if (SE->_permute) { \
|
if (SE->_permute) { \
|
||||||
spProj(tmp, in._odata[SE->_offset]); \
|
spProj(tmp, in[SE->_offset]); \
|
||||||
permute(chi, tmp, ptype); \
|
permute(chi, tmp, ptype); \
|
||||||
} else { \
|
} else { \
|
||||||
spProj(chi, in._odata[SE->_offset]); \
|
spProj(chi, in[SE->_offset]); \
|
||||||
} \
|
} \
|
||||||
} else { \
|
} else { \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi_p = &buf[SE->_offset]; \
|
||||||
} \
|
} \
|
||||||
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
|
Impl::multLink(Uchi, U[sU], *chi_p, Dir, SE, st); \
|
||||||
Recon(result, Uchi);
|
Recon(result, Uchi);
|
||||||
|
|
||||||
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
|
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
|
||||||
@ -63,16 +63,16 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
|||||||
if (SE->_is_local) { \
|
if (SE->_is_local) { \
|
||||||
chi_p = χ \
|
chi_p = χ \
|
||||||
if (SE->_permute) { \
|
if (SE->_permute) { \
|
||||||
spProj(tmp, in._odata[SE->_offset]); \
|
spProj(tmp, in[SE->_offset]); \
|
||||||
permute(chi, tmp, ptype); \
|
permute(chi, tmp, ptype); \
|
||||||
} else { \
|
} else { \
|
||||||
spProj(chi, in._odata[SE->_offset]); \
|
spProj(chi, in[SE->_offset]); \
|
||||||
} \
|
} \
|
||||||
} else if ( st.same_node[Dir] ) { \
|
} else if ( st.same_node[Dir] ) { \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi_p = &buf[SE->_offset]; \
|
||||||
} \
|
} \
|
||||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||||
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
|
Impl::multLink(Uchi, U[sU], *chi_p, Dir, SE, st); \
|
||||||
Recon(result, Uchi); \
|
Recon(result, Uchi); \
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
|||||||
SE = st.GetEntry(ptype, Dir, sF); \
|
SE = st.GetEntry(ptype, Dir, sF); \
|
||||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi_p = &buf[SE->_offset]; \
|
||||||
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
|
Impl::multLink(Uchi, U[sU], *chi_p, Dir, SE, st); \
|
||||||
Recon(result, Uchi); \
|
Recon(result, Uchi); \
|
||||||
nmu++; \
|
nmu++; \
|
||||||
}
|
}
|
||||||
@ -88,14 +88,14 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
|||||||
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
|
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
|
||||||
if (gamma == Dir) { \
|
if (gamma == Dir) { \
|
||||||
if (SE->_is_local && SE->_permute) { \
|
if (SE->_is_local && SE->_permute) { \
|
||||||
spProj(tmp, in._odata[SE->_offset]); \
|
spProj(tmp, in[SE->_offset]); \
|
||||||
permute(chi, tmp, ptype); \
|
permute(chi, tmp, ptype); \
|
||||||
} else if (SE->_is_local) { \
|
} else if (SE->_is_local) { \
|
||||||
spProj(chi, in._odata[SE->_offset]); \
|
spProj(chi, in[SE->_offset]); \
|
||||||
} else { \
|
} else { \
|
||||||
chi = buf[SE->_offset]; \
|
chi = buf[SE->_offset]; \
|
||||||
} \
|
} \
|
||||||
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); \
|
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
|
||||||
Recon(result, Uchi); \
|
Recon(result, Uchi); \
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -123,7 +123,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm);
|
GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm);
|
||||||
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
|
||||||
vstream(out._odata[sF], result);
|
vstream(out[sF], result);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -147,7 +147,7 @@ void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, Do
|
|||||||
GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm);
|
GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm);
|
||||||
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
|
||||||
vstream(out._odata[sF], result);
|
vstream(out[sF], result);
|
||||||
};
|
};
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// Interior kernels
|
// Interior kernels
|
||||||
@ -174,7 +174,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &
|
|||||||
GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm);
|
GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm);
|
||||||
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
|
||||||
vstream(out._odata[sF], result);
|
vstream(out[sF], result);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -198,7 +198,7 @@ void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm);
|
GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm);
|
||||||
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
|
||||||
vstream(out._odata[sF], result);
|
vstream(out[sF], result);
|
||||||
};
|
};
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// Exterior kernels
|
// Exterior kernels
|
||||||
@ -226,7 +226,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &
|
|||||||
GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm);
|
||||||
if ( nmu ) {
|
if ( nmu ) {
|
||||||
out._odata[sF] = out._odata[sF] + result;
|
out[sF] = out[sF] + result;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -253,7 +253,7 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm);
|
||||||
if ( nmu ) {
|
if ( nmu ) {
|
||||||
out._odata[sF] = out._odata[sF] + result;
|
out[sF] = out[sF] + result;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -277,7 +277,7 @@ void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U,SiteHa
|
|||||||
GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm);
|
GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm);
|
||||||
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
|
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
|
||||||
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
|
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
|
||||||
vstream(out._odata[sF], result);
|
vstream(out[sF], result);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
@ -307,7 +307,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(
|
|||||||
{
|
{
|
||||||
SitePropagator result, tmp;
|
SitePropagator result, tmp;
|
||||||
Gamma g5(Gamma::Algebra::Gamma5);
|
Gamma g5(Gamma::Algebra::Gamma5);
|
||||||
Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu);
|
Impl::multLinkProp(tmp, U[sU], q_in_1, mu);
|
||||||
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
|
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
|
||||||
if (switch_sign)
|
if (switch_sign)
|
||||||
{
|
{
|
||||||
@ -337,7 +337,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(
|
|||||||
{
|
{
|
||||||
SitePropagator result, tmp;
|
SitePropagator result, tmp;
|
||||||
Gamma g5(Gamma::Algebra::Gamma5);
|
Gamma g5(Gamma::Algebra::Gamma5);
|
||||||
Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu + Nd);
|
Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd);
|
||||||
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
|
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
|
||||||
if (switch_sign)
|
if (switch_sign)
|
||||||
{
|
{
|
||||||
@ -398,7 +398,7 @@ void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
|||||||
bool switch_sign)
|
bool switch_sign)
|
||||||
{
|
{
|
||||||
SitePropagator result;
|
SitePropagator result;
|
||||||
Impl::multLinkProp(result, U._odata[sU], q_in, mu);
|
Impl::multLinkProp(result, U[sU], q_in, mu);
|
||||||
result = WilsonCurrentFwd(result, mu);
|
result = WilsonCurrentFwd(result, mu);
|
||||||
|
|
||||||
// Zero any unwanted timeslice entries.
|
// Zero any unwanted timeslice entries.
|
||||||
@ -430,7 +430,7 @@ void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
|||||||
bool switch_sign)
|
bool switch_sign)
|
||||||
{
|
{
|
||||||
SitePropagator result;
|
SitePropagator result;
|
||||||
Impl::multLinkProp(result, U._odata[sU], q_in, mu + Nd);
|
Impl::multLinkProp(result, U[sU], q_in, mu + Nd);
|
||||||
result = WilsonCurrentBwd(result, mu);
|
result = WilsonCurrentBwd(result, mu);
|
||||||
|
|
||||||
// Zero any unwanted timeslice entries.
|
// Zero any unwanted timeslice entries.
|
||||||
|
@ -130,7 +130,7 @@
|
|||||||
int local,perm, ptype;
|
int local,perm, ptype;
|
||||||
uint64_t base;
|
uint64_t base;
|
||||||
uint64_t basep;
|
uint64_t basep;
|
||||||
const uint64_t plocal =(uint64_t) & in._odata[0];
|
const uint64_t plocal =(uint64_t) & in[0];
|
||||||
|
|
||||||
COMPLEX_SIGNS(isigns);
|
COMPLEX_SIGNS(isigns);
|
||||||
MASK_REGS;
|
MASK_REGS;
|
||||||
@ -166,7 +166,7 @@
|
|||||||
if (nmu==0) break;
|
if (nmu==0) break;
|
||||||
// if (nmu!=0) std::cout << "EXT "<<sU<<std::endl;
|
// if (nmu!=0) std::cout << "EXT "<<sU<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
base = (uint64_t) &out._odata[ss];
|
base = (uint64_t) &out[ss];
|
||||||
basep= st.GetPFInfo(nent,plocal); nent++;
|
basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
RESULT(base,basep);
|
RESULT(base,basep);
|
||||||
}
|
}
|
||||||
|
@ -45,7 +45,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
Chimu_32=ref(F)(3)(2)
|
Chimu_32=ref(F)(3)(2)
|
||||||
|
|
||||||
#define LOAD_CHIMU(DIR,F,PERM) \
|
#define LOAD_CHIMU(DIR,F,PERM) \
|
||||||
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
|
{ const SiteSpinor & ref (in[offset]); LOAD_CHIMU_BODY(F); }
|
||||||
|
|
||||||
#define LOAD_CHI_BODY(F) \
|
#define LOAD_CHI_BODY(F) \
|
||||||
Chi_00 = ref(F)(0)(0);\
|
Chi_00 = ref(F)(0)(0);\
|
||||||
@ -103,7 +103,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
|
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
|
||||||
{ const SiteSpinor &ref(in._odata[offset]); \
|
{ const SiteSpinor &ref(in[offset]); \
|
||||||
LOAD_CHI_SETUP(DIR,F); \
|
LOAD_CHI_SETUP(DIR,F); \
|
||||||
if(!inplace_twist){ \
|
if(!inplace_twist){ \
|
||||||
LOAD_CHIMU_BODY(g); \
|
LOAD_CHIMU_BODY(g); \
|
||||||
@ -201,10 +201,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
|
|
||||||
#define MULT_2SPIN(A,F) \
|
#define MULT_2SPIN(A,F) \
|
||||||
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
|
{auto & ref(U[sU](A)); MULT_2SPIN_BODY; }
|
||||||
|
|
||||||
#define MULT_2SPIN_GPARITY(A,F) \
|
#define MULT_2SPIN_GPARITY(A,F) \
|
||||||
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
|
{auto & ref(U[sU](F)(A)); MULT_2SPIN_BODY; }
|
||||||
|
|
||||||
|
|
||||||
#define PERMUTE_DIR(dir) \
|
#define PERMUTE_DIR(dir) \
|
||||||
@ -478,7 +478,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
#define HAND_RESULT(ss,F) \
|
#define HAND_RESULT(ss,F) \
|
||||||
{ \
|
{ \
|
||||||
SiteSpinor & ref (out._odata[ss]); \
|
SiteSpinor & ref (out[ss]); \
|
||||||
vstream(ref(F)(0)(0),result_00); \
|
vstream(ref(F)(0)(0),result_00); \
|
||||||
vstream(ref(F)(0)(1),result_01); \
|
vstream(ref(F)(0)(1),result_01); \
|
||||||
vstream(ref(F)(0)(2),result_02); \
|
vstream(ref(F)(0)(2),result_02); \
|
||||||
@ -495,7 +495,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
#define HAND_RESULT_EXT(ss,F) \
|
#define HAND_RESULT_EXT(ss,F) \
|
||||||
if (nmu){ \
|
if (nmu){ \
|
||||||
SiteSpinor & ref (out._odata[ss]); \
|
SiteSpinor & ref (out[ss]); \
|
||||||
ref(F)(0)(0)+=result_00; \
|
ref(F)(0)(0)+=result_00; \
|
||||||
ref(F)(0)(1)+=result_01; \
|
ref(F)(0)(1)+=result_01; \
|
||||||
ref(F)(0)(2)+=result_02; \
|
ref(F)(0)(2)+=result_02; \
|
||||||
|
@ -61,14 +61,14 @@ template<class Impl>
|
|||||||
void WilsonTMFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
void WilsonTMFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||||
RealD a = 4.0+this->mass;
|
RealD a = 4.0+this->mass;
|
||||||
RealD b = this->mu;
|
RealD b = this->mu;
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
axpibg5x(out,in,a,b);
|
axpibg5x(out,in,a,b);
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonTMFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
void WilsonTMFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||||
RealD a = 4.0+this->mass;
|
RealD a = 4.0+this->mass;
|
||||||
RealD b = -this->mu;
|
RealD b = -this->mu;
|
||||||
out.checkerboard = in.checkerboard;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
axpibg5x(out,in,a,b);
|
axpibg5x(out,in,a,b);
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
|
Loading…
Reference in New Issue
Block a user