1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Rewrite for performance. Impl dependent instantiations give

4d linalg impls of the 5d hopping terms (and inverse)
Cache friendly loop orderings of the above
Dense matrix stored and apply to the above

-- Switch to Ls vectorised, and use dense matrix approach for the MooeeInv
   and rotate/shift of the Mooee M5D routines.
This commit is contained in:
paboyle 2016-07-14 23:58:15 +01:00
parent fb45eb2eb2
commit 79a8ca1a62

View File

@ -28,7 +28,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Eigen/Dense>
#include <Grid.h>
@ -49,556 +48,340 @@ namespace QCD {
FourDimGrid,
FourDimRedBlackGrid,_M5,p),
mass(_mass)
{
}
{ }
template<class Impl>
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{
// Assemble Din
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
// Din = bs psi[s] + cs[s] psi[s+1}
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
// Din+= -mass*cs[s] psi[s+1}
axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
} else {
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
}
}
template<class Impl>
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag (Ls,1.0);
std::vector<RealD> upper(Ls,-1.0); upper[Ls-1]=mass;
std::vector<RealD> lower(Ls,-1.0); lower[0] =mass;
M5D(psi,chi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
std::vector<RealD> diag = bs;
std::vector<RealD> upper= cs;
std::vector<RealD> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,Din,lower,diag,upper);
}
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = beo;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-ceo[i];
lower[i]=-ceo[i];
}
template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
axpby_ssp_pminus(Din,1.0,Din,-mass*cs[Ls-1],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus (Din,bs[s],psi,-mass*cs[0],psi,s,0);
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
} else {
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
}
}
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = bee;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-cee[i];
lower[i]=-cee[i];
}
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,chi,lower,diag,upper);
}
// override multiply
template<class Impl>
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = bee;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
FermionField Din(psi._grid);
// Assemble Din
Meooe5D(psi,Din);
this->DW(Din,chi,DaggerNo);
// ((b D_W + D_w hop terms +1) on s-diag
axpby(chi,1.0,1.0,chi,psi);
// Call Mooee??
for(int s=0;s<Ls;s++){
if ( s==0 ){
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,0);
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
} else {
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
}
}
return norm2(chi);
}
template<class Impl>
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
{
// Under adjoint
//D1+ D1- P- -> D1+^dag P+ D2-^dag
//D2- P+ D2+ P-D1-^dag D2+dag
FermionField Din(psi._grid);
// Apply Dw
this->DW(psi,Din,DaggerYes);
MeooeDag5D(Din,chi);
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
// Collect the terms in DW
// Chi = bs Din[s] + cs[s] Din[s+1}
// Chi+= -mass*cs[s] psi[s+1}
// FIXME just call MooeeDag??
// Collect the terms indept of DW
if ( s==0 ){
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,0);
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
} else {
axpby_ssp_pplus(chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
}
}
// ((b D_W + D_w hop terms +1) on s-diag
axpby (chi,1.0,1.0,chi,psi);
return norm2(chi);
}
// half checkerboard operations
template<class Impl>
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
FermionField tmp(psi._grid);
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
Meooe5D(psi,tmp);
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
this->DhopEO(tmp,chi,DaggerNo);
if ( s==0 ) {
upper[s] = -cee[s+1] ;
lower[s] = mass*cee[Ls-1];
} else if ( s==(Ls-1)) {
upper[s] = mass*cee[0];
lower[s] = -cee[s-1];
} else {
this->DhopOE(tmp,chi,DaggerNo);
upper[s]=-cee[s+1];
lower[s]=-cee[s-1];
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
{
FermionField tmp(psi._grid);
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
this->DhopEO(psi,tmp,DaggerYes);
} else {
this->DhopOE(psi,tmp,DaggerYes);
}
M5Ddag(psi,psi,chi,lower,diag,upper);
}
MeooeDag5D(tmp,chi);
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag(Ls,1.0);
std::vector<RealD> upper(Ls,-1.0);
std::vector<RealD> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
std::vector<RealD> diag =bs;
std::vector<RealD> upper=cs;
std::vector<RealD> lower=cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,psi,Din,lower,diag,upper);
}
template<class Impl>
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
FermionField Din(psi._grid);
// Assemble Din
Meooe5D(psi,Din);
this->DW(Din,chi,DaggerNo);
// ((b D_W + D_w hop terms +1) on s-diag
axpby(chi,1.0,1.0,chi,psi);
M5D(psi,chi);
return(norm2(chi));
}
template<class Impl>
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
{
// Under adjoint
//D1+ D1- P- -> D1+^dag P+ D2-^dag
//D2- P+ D2+ P-D1-^dag D2+dag
FermionField Din(psi._grid);
// Apply Dw
this->DW(psi,Din,DaggerYes);
MeooeDag5D(Din,chi);
M5Ddag(psi,chi);
// ((b D_W + D_w hop terms +1) on s-diag
axpby (chi,1.0,1.0,chi,psi);
return norm2(chi);
}
// half checkerboard operations
template<class Impl>
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
FermionField tmp(psi._grid);
Meooe5D(psi,tmp);
if ( psi.checkerboard == Odd ) {
this->DhopEO(tmp,chi,DaggerNo);
} else {
this->DhopOE(tmp,chi,DaggerNo);
}
}
template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
for (int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,mass*cee[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(chi,bee[s],psi,mass*cee[s],psi,s,0);
axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
} else {
axpby_ssp_pminus(chi,bee[s],psi,-cee[s],psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
}
}
FermionField temp(psi._grid);
this->MooeeDenseInternal(psi,temp,DaggerNo,InverseNo);
temp = temp - chi;
std::cout << "Difference between dense and normal "<< norm2(temp) <<std::endl;
template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
{
FermionField tmp(psi._grid);
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
this->DhopEO(psi,tmp,DaggerYes);
} else {
this->DhopOE(psi,tmp,DaggerYes);
}
MeooeDag5D(tmp,chi);
}
template<class Impl>
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
int Ls=this->Ls;
FermionField tmp(psi._grid);
// Assemble the 5d matrix
for(int s=0;s<Ls;s++){
if ( s==0 ) {
// tmp = bs psi[s] + cs[s] psi[s+1}
// tmp+= -mass*cs[s] psi[s+1}
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
} else {
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
}
}
// Apply 4d dslash fragment
this->DhopDir(tmp,chi,dir,disp);
template<class Impl>
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
FermionField tmp(psi._grid);
Meo5D(psi,tmp);
// Apply 4d dslash fragment
this->DhopDir(tmp,chi,dir,disp);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDeriv(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din);
this->DhopDeriv(mat,Din,V,dag);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
if ( s==0 ) {
axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1] ,psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,mass*cee[Ls-1],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus(chi,bee[s],psi,mass*cee[0],psi,s,0);
axpby_ssp_pminus(chi,1.0,chi,-cee[s-1],psi,s,s-1);
} else {
axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1],psi,s,s+1);
axpby_ssp_pminus(chi,1.0 ,chi,-cee[s-1],psi,s,s-1);
}
}
FermionField temp(psi._grid);
this->MooeeDenseInternal(psi,temp,DaggerYes,InverseNo);
temp = temp - chi;
std::cout << "Difference between dense and normal "<< norm2(temp) <<std::endl;
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
FermionField temp(psi._grid);
this->MooeeLDUInv(psi,chi);
this->MooeeDenseInv(psi,temp);
temp = temp - chi;
std::cout << "Difference between dense and normal "<< norm2(temp) <<std::endl;
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
FermionField temp(psi._grid);
this->MooeeLDUInvDag(psi,chi);
this->MooeeDenseInvDag(psi,temp);
temp = temp - chi;
std::cout << "Difference between dense and normal "<< norm2(temp) <<std::endl;
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDenseInvDag (const FermionField &psi, FermionField &chi)
{
this->MooeeDenseInternal(psi,chi,DaggerYes,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDenseInv(const FermionField &psi, FermionField &chi)
{
this->MooeeDenseInternal(psi,chi,DaggerNo,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDenseInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
int Ls=this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard=psi.checkerboard;
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = bee[s];
Pminus(s,s)= bee[s];
}
for(int s=0;s<Ls-1;s++){
Pminus(s,s+1) = -cee[s];
}
for(int s=0;s<Ls-1;s++){
Pplus(s+1,s) = -cee[s];
}
std::cout << " Pplus "<<Pplus<<std::endl;
std::cout << " Pminus "<<Pminus<<std::endl;
Pplus (0,Ls-1) = mass*cee[0];
Pminus(Ls-1,0) = mass*cee[Ls-1];
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if ( inv ) {
PplusMat =Pplus.inverse();
PminusMat=Pminus.inverse();
} else {
PplusMat =Pplus;
PminusMat=Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
PARALLEL_FOR_LOOP
for(auto site=0;site<vol;site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0;s1<Ls;s1++){
SiteChi =zero;
for(int s2=0;s2<Ls;s2++){
int lex2 = s2+Ls*site;
if ( PplusMat(s1,s2) != 0.0 ) {
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi,PplusMat (s1,s2)*SitePplus);
}
if ( PminusMat(s1,s2) != 0.0 ) {
spProj5m(SitePminus,psi[lex2]);
accumRecon5m(SiteChi,PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi;
}
}
// For the non-vectorised s-direction we have more work to do in an alternate implementation
//
// ai = Mij bj
//
// where b == {b0} {b1}..
//
// Best to load/bcast b0
//
// ai = Mi0 b0
// for(int i=0;i<Ls;i++){
// ai += Mij bj
// }
//
// For rb5d shamir DWF, the Moo is proportional to the identity
// For rb4d Moo, we need to set a vector of coeffs, and use a stencil type construct, which is cheap.
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeLDUInv (const FermionField &psi, FermionField &chi)
{
chi.checkerboard=psi.checkerboard;
int Ls=this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
for (int s=1;s<Ls;s++){
axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
}
// U_m^{-1} D^{-1}
for (int s=0;s<Ls-1;s++){
// Chi[s] + 1/d chi[s]
axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
}
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
// Apply U^{-1}
for (int s=Ls-2;s>=0;s--){
axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls]
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeLDUInvDag (const FermionField &psi, FermionField &chi)
{
chi.checkerboard=psi.checkerboard;
int Ls=this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
for (int s=1;s<Ls;s++){
axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
}
// U_m^{-\dagger}
for (int s=0;s<Ls-1;s++){
axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
}
// L_m^{-\dagger} D^{-dagger}
for (int s=0;s<Ls-1;s++){
axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
}
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
// Apply L^{-dagger}
for (int s=Ls-2;s>=0;s--){
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
}
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDeriv(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din);
this->DhopDeriv(mat,Din,V,dag);
}
};
template<class Impl>
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDerivOE(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
};
template<class Impl>
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDerivOE(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din);
this->DhopDerivOE(mat,Din,V,dag);
}
};
template<class Impl>
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDerivEO(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din);
this->DhopDerivEO(mat,Din,V,dag);
}
};
}
};
template<class Impl>
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
// Tanh
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{
SetCoefficientsZolotarev(1.0,zdata,b,c);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDerivEO(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din);
this->DhopDerivEO(mat,Din,V,dag);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
int Ls=this->Ls;
};
// Tanh
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{
SetCoefficientsZolotarev(1.0,zdata,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
int Ls=this->Ls;
///////////////////////////////////////////////////////////
// The Cayley coeffs (unprec)
///////////////////////////////////////////////////////////
omega.resize(Ls);
bs.resize(Ls);
cs.resize(Ls);
as.resize(Ls);
///////////////////////////////////////////////////////////
// The Cayley coeffs (unprec)
///////////////////////////////////////////////////////////
omega.resize(Ls);
bs.resize(Ls);
cs.resize(Ls);
as.resize(Ls);
//
// Ts = ( [bs+cs]Dw )^-1 ( (bs+cs) Dw )
// -(g5 ------- -1 ) ( g5 --------- + 1 )
// ( {2+(bs-cs)Dw} ) ( 2+(bs-cs) Dw )
//
// bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2( 1/omega(b+c) + (b-c) )
// cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2( 1/omega(b+c) - (b-c) )
//
// bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
// bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
//
// So
//
// Ts = ( [b+c]Dw/omega_s )^-1 ( (b+c) Dw /omega_s )
// -(g5 ------- -1 ) ( g5 --------- + 1 )
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
//
// Ts = ( [b+c]Dw )^-1 ( (b+c) Dw )
// -(g5 ------- -omega_s) ( g5 --------- + omega_s )
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
//
//
// Ts = ( [bs+cs]Dw )^-1 ( (bs+cs) Dw )
// -(g5 ------- -1 ) ( g5 --------- + 1 )
// ( {2+(bs-cs)Dw} ) ( 2+(bs-cs) Dw )
//
// bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2( 1/omega(b+c) + (b-c) )
// cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2( 1/omega(b+c) - (b-c) )
//
// bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
// bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
//
// So
//
// Ts = ( [b+c]Dw/omega_s )^-1 ( (b+c) Dw /omega_s )
// -(g5 ------- -1 ) ( g5 --------- + 1 )
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
//
// Ts = ( [b+c]Dw )^-1 ( (b+c) Dw )
// -(g5 ------- -omega_s) ( g5 --------- + omega_s )
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
//
double bpc = b+c;
double bmc = b-c;
for(int i=0; i < Ls; i++){
as[i] = 1.0;
omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc);
}
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
bee.resize(Ls);
cee.resize(Ls);
beo.resize(Ls);
ceo.resize(Ls);
for(int i=0;i<Ls;i++){
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
beo[i]=as[i]*bs[i];
ceo[i]=-as[i]*cs[i];
}
aee.resize(Ls);
aeo.resize(Ls);
for(int i=0;i<Ls;i++){
aee[i]=cee[i];
aeo[i]=ceo[i];
}
//////////////////////////////////////////
// LDU decomposition of eeoo
//////////////////////////////////////////
dee.resize(Ls);
lee.resize(Ls);
leem.resize(Ls);
uee.resize(Ls);
ueem.resize(Ls);
for(int i=0;i<Ls;i++){
dee[i] = bee[i];
if ( i < Ls-1 ) {
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
leem[i]=mass*cee[Ls-1]/bee[0];
for(int j=0;j<i;j++) leem[i]*= aee[j]/bee[j+1];
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
ueem[i]=mass;
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
ueem[i]*= aee[0]/bee[0];
} else {
lee[i] =0.0;
leem[i]=0.0;
uee[i] =0.0;
ueem[i]=0.0;
}
}
{
double delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
dee[Ls-1] += delta_d;
}
double bpc = b+c;
double bmc = b-c;
for(int i=0; i < Ls; i++){
as[i] = 1.0;
omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc);
}
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
bee.resize(Ls);
cee.resize(Ls);
beo.resize(Ls);
ceo.resize(Ls);
for(int i=0;i<Ls;i++){
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
beo[i]=as[i]*bs[i];
ceo[i]=-as[i]*cs[i];
}
aee.resize(Ls);
aeo.resize(Ls);
for(int i=0;i<Ls;i++){
aee[i]=cee[i];
aeo[i]=ceo[i];
}
//////////////////////////////////////////
// LDU decomposition of eeoo
//////////////////////////////////////////
dee.resize(Ls);
lee.resize(Ls);
leem.resize(Ls);
uee.resize(Ls);
ueem.resize(Ls);
for(int i=0;i<Ls;i++){
dee[i] = bee[i];
if ( i < Ls-1 ) {
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
leem[i]=mass*cee[Ls-1]/bee[0];
for(int j=0;j<i;j++) leem[i]*= aee[j]/bee[j+1];
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
ueem[i]=mass;
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
ueem[i]*= aee[0]/bee[0];
} else {
lee[i] =0.0;
leem[i]=0.0;
uee[i] =0.0;
ueem[i]=0.0;
}
}
{
double delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
dee[Ls-1] += delta_d;
}
}