1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 03:05:55 +01:00

Rewrite for performance. Impl dependent instantiations give

4d linalg impls of the 5d hopping terms (and inverse)
Cache friendly loop orderings of the above
Dense matrix stored and apply to the above

-- Switch to Ls vectorised, and use dense matrix approach for the MooeeInv
   and rotate/shift of the Mooee M5D routines.
This commit is contained in:
paboyle 2016-07-14 23:58:15 +01:00
parent fb45eb2eb2
commit 79a8ca1a62

View File

@ -28,7 +28,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Eigen/Dense>
#include <Grid.h>
@ -49,48 +48,107 @@ namespace QCD {
FourDimGrid,
FourDimRedBlackGrid,_M5,p),
mass(_mass)
{
}
{ }
template<class Impl>
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag (Ls,1.0);
std::vector<RealD> upper(Ls,-1.0); upper[Ls-1]=mass;
std::vector<RealD> lower(Ls,-1.0); lower[0] =mass;
M5D(psi,chi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{
// Assemble Din
int Ls=this->Ls;
std::vector<RealD> diag = bs;
std::vector<RealD> upper= cs;
std::vector<RealD> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,Din,lower,diag,upper);
}
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = beo;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-ceo[i];
lower[i]=-ceo[i];
}
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = bee;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-cee[i];
lower[i]=-cee[i];
}
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = bee;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
if ( s==0 ) {
// Din = bs psi[s] + cs[s] psi[s+1}
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
// Din+= -mass*cs[s] psi[s+1}
axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
upper[s] = -cee[s+1] ;
lower[s] = mass*cee[Ls-1];
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
upper[s] = mass*cee[0];
lower[s] = -cee[s-1];
} else {
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
upper[s]=-cee[s+1];
lower[s]=-cee[s-1];
}
}
M5Ddag(psi,psi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag(Ls,1.0);
std::vector<RealD> upper(Ls,-1.0);
std::vector<RealD> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
axpby_ssp_pminus(Din,1.0,Din,-mass*cs[Ls-1],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus (Din,bs[s],psi,-mass*cs[0],psi,s,0);
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
} else {
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
}
}
std::vector<RealD> diag =bs;
std::vector<RealD> upper=cs;
std::vector<RealD> lower=cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,psi,Din,lower,diag,upper);
}
// override multiply
template<class Impl>
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
@ -105,20 +163,8 @@ namespace QCD {
// ((b D_W + D_w hop terms +1) on s-diag
axpby(chi,1.0,1.0,chi,psi);
// Call Mooee??
for(int s=0;s<Ls;s++){
if ( s==0 ){
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,0);
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
} else {
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
}
}
return norm2(chi);
M5D(psi,chi);
return(norm2(chi));
}
template<class Impl>
@ -134,27 +180,7 @@ namespace QCD {
MeooeDag5D(Din,chi);
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
// Collect the terms in DW
// Chi = bs Din[s] + cs[s] Din[s+1}
// Chi+= -mass*cs[s] psi[s+1}
// FIXME just call MooeeDag??
// Collect the terms indept of DW
if ( s==0 ){
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,0);
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
} else {
axpby_ssp_pplus(chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
}
}
M5Ddag(psi,chi);
// ((b D_W + D_w hop terms +1) on s-diag
axpby (chi,1.0,1.0,chi,psi);
return norm2(chi);
@ -165,12 +191,10 @@ namespace QCD {
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
FermionField tmp(psi._grid);
// Assemble the 5d matrix
Meooe5D(psi,tmp);
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
this->DhopEO(tmp,chi,DaggerNo);
} else {
@ -188,256 +212,16 @@ namespace QCD {
} else {
this->DhopOE(psi,tmp,DaggerYes);
}
MeooeDag5D(tmp,chi);
}
template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
for (int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,mass*cee[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(chi,bee[s],psi,mass*cee[s],psi,s,0);
axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
} else {
axpby_ssp_pminus(chi,bee[s],psi,-cee[s],psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
}
}
FermionField temp(psi._grid);
this->MooeeDenseInternal(psi,temp,DaggerNo,InverseNo);
temp = temp - chi;
std::cout << "Difference between dense and normal "<< norm2(temp) <<std::endl;
}
template<class Impl>
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
int Ls=this->Ls;
FermionField tmp(psi._grid);
// Assemble the 5d matrix
for(int s=0;s<Ls;s++){
if ( s==0 ) {
// tmp = bs psi[s] + cs[s] psi[s+1}
// tmp+= -mass*cs[s] psi[s+1}
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
} else {
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
}
}
Meo5D(psi,tmp);
// Apply 4d dslash fragment
this->DhopDir(tmp,chi,dir,disp);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
if ( s==0 ) {
axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1] ,psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,mass*cee[Ls-1],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus(chi,bee[s],psi,mass*cee[0],psi,s,0);
axpby_ssp_pminus(chi,1.0,chi,-cee[s-1],psi,s,s-1);
} else {
axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1],psi,s,s+1);
axpby_ssp_pminus(chi,1.0 ,chi,-cee[s-1],psi,s,s-1);
}
}
FermionField temp(psi._grid);
this->MooeeDenseInternal(psi,temp,DaggerYes,InverseNo);
temp = temp - chi;
std::cout << "Difference between dense and normal "<< norm2(temp) <<std::endl;
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
FermionField temp(psi._grid);
this->MooeeLDUInv(psi,chi);
this->MooeeDenseInv(psi,temp);
temp = temp - chi;
std::cout << "Difference between dense and normal "<< norm2(temp) <<std::endl;
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
FermionField temp(psi._grid);
this->MooeeLDUInvDag(psi,chi);
this->MooeeDenseInvDag(psi,temp);
temp = temp - chi;
std::cout << "Difference between dense and normal "<< norm2(temp) <<std::endl;
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDenseInvDag (const FermionField &psi, FermionField &chi)
{
this->MooeeDenseInternal(psi,chi,DaggerYes,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDenseInv(const FermionField &psi, FermionField &chi)
{
this->MooeeDenseInternal(psi,chi,DaggerNo,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDenseInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
int Ls=this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard=psi.checkerboard;
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = bee[s];
Pminus(s,s)= bee[s];
}
for(int s=0;s<Ls-1;s++){
Pminus(s,s+1) = -cee[s];
}
for(int s=0;s<Ls-1;s++){
Pplus(s+1,s) = -cee[s];
}
std::cout << " Pplus "<<Pplus<<std::endl;
std::cout << " Pminus "<<Pminus<<std::endl;
Pplus (0,Ls-1) = mass*cee[0];
Pminus(Ls-1,0) = mass*cee[Ls-1];
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if ( inv ) {
PplusMat =Pplus.inverse();
PminusMat=Pminus.inverse();
} else {
PplusMat =Pplus;
PminusMat=Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
PARALLEL_FOR_LOOP
for(auto site=0;site<vol;site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0;s1<Ls;s1++){
SiteChi =zero;
for(int s2=0;s2<Ls;s2++){
int lex2 = s2+Ls*site;
if ( PplusMat(s1,s2) != 0.0 ) {
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi,PplusMat (s1,s2)*SitePplus);
}
if ( PminusMat(s1,s2) != 0.0 ) {
spProj5m(SitePminus,psi[lex2]);
accumRecon5m(SiteChi,PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi;
}
}
// For the non-vectorised s-direction we have more work to do in an alternate implementation
//
// ai = Mij bj
//
// where b == {b0} {b1}..
//
// Best to load/bcast b0
//
// ai = Mi0 b0
// for(int i=0;i<Ls;i++){
// ai += Mij bj
// }
//
// For rb5d shamir DWF, the Moo is proportional to the identity
// For rb4d Moo, we need to set a vector of coeffs, and use a stencil type construct, which is cheap.
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeLDUInv (const FermionField &psi, FermionField &chi)
{
chi.checkerboard=psi.checkerboard;
int Ls=this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
for (int s=1;s<Ls;s++){
axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
}
// U_m^{-1} D^{-1}
for (int s=0;s<Ls-1;s++){
// Chi[s] + 1/d chi[s]
axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
}
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
// Apply U^{-1}
for (int s=Ls-2;s>=0;s--){
axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls]
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeLDUInvDag (const FermionField &psi, FermionField &chi)
{
chi.checkerboard=psi.checkerboard;
int Ls=this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
for (int s=1;s<Ls;s++){
axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
}
// U_m^{-\dagger}
for (int s=0;s<Ls-1;s++){
axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
}
// L_m^{-\dagger} D^{-dagger}
for (int s=0;s<Ls-1;s++){
axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
}
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
// Apply L^{-dagger}
for (int s=Ls-2;s>=0;s--){
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
}
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
@ -597,7 +381,6 @@ PARALLEL_FOR_LOOP
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
dee[Ls-1] += delta_d;
}
}