1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

More re-import of Mobius EOFA

This commit is contained in:
David Murphy 2017-08-17 19:28:53 -04:00
parent e140b3f802
commit ac9e6b63c0
2 changed files with 261 additions and 229 deletions

View File

@ -347,9 +347,7 @@ namespace QCD {
GridBase* grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0];
if(LLs == Ls){
return; // Not vectorised in 5th direction
}
if(LLs == Ls){ return; } // Not vectorised in 5th direction
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);

View File

@ -133,7 +133,36 @@ namespace QCD {
}
template<class Impl>
void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi){ }
void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD m = this->mq1;
RealD c = 0.5 * this->alpha;
RealD d = 0.5;
RealD DtInv_p(0.0), DtInv_m(0.0);
RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls);
FermionField tmp = zero;
for(int s=0; s<Ls; ++s){
for(int sp=0; sp<Ls; ++sp){
DtInv_p = m * std::pow(-1.0,s-sp+1) * std::pow(c-d,Ls+s-sp) / std::pow(c+d,s-sp+1) / N;
DtInv_p += (s < sp) ? 0.0 : std::pow(-1.0,s-sp) * std::pow(c-d,s-sp) / std::pow(c+d,s-sp+1);
DtInv_m = m * std::pow(-1.0,sp-s+1) * std::pow(c-d,Ls+sp-s) / std::pow(c+d,sp-s+1) / N;
DtInv_m += (s > sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1);
if(dag){
RealD tmp(DtInv_p);
DtInv_p = DtInv_m;
DtInv_m = tmp;
}
axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
}}
}
/*****************************************************************************************************/
@ -172,298 +201,303 @@ namespace QCD {
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int Ls = this->Ls;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); }
// no shift term
if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
// fused M + shift operation
else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
int Ls = this->Ls;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1 + shiftp;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1 + shiftm;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftm;
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); }
#if(0)
std::cout << GridLogMessage << "MobiusEOFAFermion::M5Ddag(FF&,FF&):" << std::endl;
for(int i=0; i<diag.size(); ++i){
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
}
for(int i=0; i<upper.size(); ++i){
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
}
for(int i=0; i<lower.size(); ++i){
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
}
#endif
this->M5Ddag(psi, chi, chi, lower, diag, upper);
// fused M + shift operation
else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
// half checkerboard operations
template<class Impl>
void MobiusEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int Ls = this->Ls;
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
// coefficients of Mooee
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] *= -this->mq1;
lower[0] *= -this->mq1;
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dm;
lower[0] = this->dp;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); }
this->M5D(psi, psi, chi, lower, diag, upper);
// fused M + shift operation
else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int Ls = this->Ls;
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
// coefficients of MooeeDag
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
if(s==0) {
upper[s] = -this->cee[s+1];
lower[s] = this->mq1*this->cee[Ls-1];
} else if(s==(Ls-1)) {
upper[s] = this->mq1*this->cee[0];
lower[s] = -this->cee[s-1];
} else {
upper[s] = -this->cee[s+1];
lower[s] = -this->cee[s-1];
}
upper[Ls-1] = this->dp;
lower[0] = this->dm;
}
this->M5Ddag(psi, psi, chi, lower, diag, upper);
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
/****************************************************************************************/
//Zolo
// Computes coefficients for applying Cayley preconditioned shift operators
// (Mooee + \Delta) --> Mooee_shift
// (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm
// (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm
// For the latter two cases, the operation takes the form
// [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} +
// ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} )
template<class Impl>
void MobiusEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c)
void MobiusEOFAFermion<Impl>::SetCoefficientsPrecondShiftOps()
{
int Ls = this->Ls;
int pm = this->pm;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
RealD shift = this->shift;
int Ls = this->Ls;
int pm = this->pm;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
RealD shift = this->shift;
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
this->bs.resize(Ls);
this->cs.resize(Ls);
this->aee.resize(Ls);
this->aeo.resize(Ls);
this->bee.resize(Ls);
this->beo.resize(Ls);
this->cee.resize(Ls);
this->ceo.resize(Ls);
// Initialize
Mooee_shift.resize(Ls);
MooeeInv_shift_lc.resize(Ls);
MooeeInv_shift_norm.resize(Ls);
MooeeInvDag_shift_lc.resize(Ls);
MooeeInvDag_shift_norm.resize(Ls);
for(int i=0; i<Ls; ++i){
this->bee[i] = 4.0 - this->M5 + 1.0;
this->cee[i] = 1.0;
}
// Construct Mooee_shift
int idx(0);
Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) *
( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
idx = (pm == 1) ? (s) : (Ls-1-s);
Mooee_shift[idx] = N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1);
}
for(int i=0; i<Ls; ++i){
this->aee[i] = this->cee[i];
this->bs[i] = this->beo[i] = 1.0;
this->cs[i] = this->ceo[i] = 0.0;
}
// Tridiagonal solve for MooeeInvDag_shift_lc
{
Coeff_t m(0.0);
std::vector<Coeff_t> d = Mooee_shift;
std::vector<Coeff_t> u(Ls,0.0);
std::vector<Coeff_t> y(Ls,0.0);
std::vector<Coeff_t> q(Ls,0.0);
if(pm == 1){ u[0] = 1.0; }
else{ u[Ls-1] = 1.0; }
//////////////////////////////////////////
// EOFA shift terms
//////////////////////////////////////////
// Tridiagonal matrix algorithm + Sherman-Morrison formula
//
// We solve
// ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift
// where Mooee' is the tridiagonal part of Mooee_{+}, and
// u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen
// so that the outer-product u \otimes v gives the (0,Ls-1)
// entry of Mooee_{+}.
//
// We do this as two solves: Mooee'*y = d and Mooee'*q = u,
// and then construct the solution to the original system
// MooeeInvDag_shift_lc = y - <v,y> / ( 1 + <v,q> ) q
if(pm == 1){
this->dp = mq1*this->cee[0] + shift*(mq3-mq2);
this->dm = mq1*this->cee[Ls-1];
} else if(this->pm == -1) {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1] - shift*(mq3-mq2);
} else {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1];
for(int s=1; s<Ls; ++s){
m = -this->cee[s] / this->bee[s-1];
d[s] -= m*d[s-1];
u[s] -= m*u[s-1];
}
}
//////////////////////////////////////////
// LDU decomposition of eeoo
//////////////////////////////////////////
this->dee.resize(Ls+1);
this->lee.resize(Ls);
this->leem.resize(Ls);
this->uee.resize(Ls);
this->ueem.resize(Ls);
for(int i=0; i<Ls; ++i){
if(i < Ls-1){
this->lee[i] = -this->cee[i+1]/this->bee[i]; // sub-diag entry on the ith column
this->leem[i] = this->dm/this->bee[i];
for(int j=0; j<i; j++){ this->leem[i] *= this->aee[j]/this->bee[j]; }
this->dee[i] = this->bee[i];
this->uee[i] = -this->aee[i]/this->bee[i]; // up-diag entry on the ith row
this->ueem[i] = this->dp / this->bee[0];
for(int j=1; j<=i; j++){ this->ueem[i] *= this->cee[j]/this->bee[j]; }
y[Ls-1] = d[Ls-1] / this->bee[Ls-1];
q[Ls-1] = u[Ls-1] / this->bee[Ls-1];
for(int s=Ls-2; s>=0; --s){
if(pm == 1){
y[s] = d[s] / this->bee[s];
q[s] = u[s] / this->bee[s];
} else {
this->lee[i] = 0.0;
this->leem[i] = 0.0;
this->uee[i] = 0.0;
this->ueem[i] = 0.0;
y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s];
q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s];
}
}
{
Coeff_t delta_d = 1.0 / this->bee[0];
for(int j=1; j<Ls-1; j++){ delta_d *= this->cee[j] / this->bee[j]; }
this->dee[Ls-1] = this->bee[Ls-1] + this->cee[0] * this->dm * delta_d;
this->dee[Ls] = this->bee[Ls-1] + this->cee[Ls-1] * this->dp * delta_d;
// Construct MooeeInvDag_shift_lc
for(int s=0; s<Ls; ++s){
if(pm == 1){
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[0]*y[Ls-1] /
(1.0+mq1*this->cee[0]*q[Ls-1]) * q[s];
} else {
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] /
(1.0+mq1*this->cee[Ls-1]*q[0]) * q[s];
}
}
int inv = 1;
this->MooeeInternalCompute(0, inv, this->MatpInv, this->MatmInv);
this->MooeeInternalCompute(1, inv, this->MatpInvDag, this->MatmInvDag);
// Compute remaining coefficients
N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]);
for(int s=0; s<Ls; ++s){
// MooeeInv_shift_lc
if(pm == 1){ MooeeInv_shift_lc[s] = std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s); }
else{ MooeeInv_shift_lc[s] = std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s); }
// MooeeInv_shift_norm
MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N;
// MooeeInvDag_shift_norm
if(pm == 1){ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s) /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
else{ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s) /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
}
}
}
// Recompute Cayley-form coefficients for different shift
// Recompute coefficients for a different value of shift constant
template<class Impl>
void MobiusEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
Approx::zolotarev_data *zdata = Approx::higham(1.0, this->Ls);
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
this->shift = new_shift;
if(new_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
int Ls = this->Ls;
Mooee_shift.resize(Ls,0.0);
MooeeInv_shift_lc.resize(Ls,0.0);
MooeeInv_shift_norm.resize(Ls,0.0);
MooeeInvDag_shift_lc.resize(Ls,0.0);
MooeeInvDag_shift_norm.resize(Ls,0.0);
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
int Ls = this->Ls;
int Ls = this->Ls;
GridBase* grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0];
GridBase* grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0];
if(LLs == Ls){
return; // Not vectorised in 5th direction
}
if(LLs == Ls){ return; } // Not vectorised in 5th direction
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0; s<Ls; s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls; s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
Pplus(s+1,s) = -this->cee[s+1];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->mq1*this->cee[0];
Pminus(Ls-1,0) = this->mq1*this->cee[Ls-1];
Pplus (0,Ls-1) = this->dp;
Pminus(Ls-1,0) = this->dm;
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
#if(0)
std::cout << GridLogMessage << "Pplus:" << std::endl;
for(int s=0; s<Ls; ++s){
for(int ss=0; ss<Ls; ++ss){
std::cout << Pplus(s,ss) << "\t";
}
std::cout << std::endl;
}
std::cout << GridLogMessage << "Pminus:" << std::endl;
for(int s=0; s<Ls; ++s){
for(int ss=0; ss<Ls; ++ss){
std::cout << Pminus(s,ss) << "\t";
}
std::cout << std::endl;
}
#endif
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
if(this->shift != 0.0){
RealD c = 0.5 * this->alpha;
RealD d = 0.5;
RealD N = this->shift * this->k * ( std::pow(c+d,Ls) + this->mq1*std::pow(c-d,Ls) );
if(this->pm == 1) {
for(int s=0; s<Ls; ++s){
Pplus(s,Ls-1) += N * std::pow(-1.0,s) * std::pow(c-d,s) / std::pow(c+d,Ls+s+1);
}
} else {
PplusMat = Pplus;
PminusMat = Pminus;
for(int s=0; s<Ls; ++s){
Pminus(s,0) += N * std::pow(-1.0,s+1) * std::pow(c-d,Ls-1-s) / std::pow(c+d,2*Ls-s);
}
}
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
typedef typename SiteHalfSpinor::scalar_type scalar_type;
const int Nsimd = Simd::Nsimd();
Matp.resize(Ls*LLs);
Matm.resize(Ls*LLs);
for(int s2=0; s2<Ls; s2++){
for(int s1=0; s1<LLs; s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type*) &Vp;
scalar_type *sm = (scalar_type*) &Vm;
for(int l=0; l<Nsimd; l++){
if(switcheroo<Coeff_t>::iscomplex()) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
}
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
}
typedef typename SiteHalfSpinor::scalar_type scalar_type;
const int Nsimd = Simd::Nsimd();
Matp.resize(Ls*LLs);
Matm.resize(Ls*LLs);
for(int s2=0; s2<Ls; s2++){
for(int s1=0; s1<LLs; s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type*) &Vp;
scalar_type *sm = (scalar_type*) &Vm;
for(int l=0; l<Nsimd; l++){
if(switcheroo<Coeff_t>::iscomplex()) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
}
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
}
FermOpTemplateInstantiate(MobiusEOFAFermion);
GparityFermOpTemplateInstantiate(MobiusEOFAFermion);
FermOpTemplateInstantiate(MobiusEOFAFermion);
GparityFermOpTemplateInstantiate(MobiusEOFAFermion);
}}