1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Merge branch 'develop' of https://github.com/paboyle/Grid into develop

This commit is contained in:
Azusa Yamaguchi 2017-10-10 14:26:44 +01:00
commit f1fa00b71b
97 changed files with 12471 additions and 228 deletions

12
TODO
View File

@ -3,19 +3,19 @@ TODO:
Large item work list:
1)- BG/Q port and check
1)- BG/Q port and check ; Andrew says ok.
2)- Christoph's local basis expansion Lanczos
3)- Precision conversion and sort out localConvert <-- partial
- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
--
3a)- RNG I/O in ILDG/SciDAC (minor)
3b)- Precision conversion and sort out localConvert <-- partial/easy
3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
4)- Physical propagator interface
5)- Conserved currents
6)- Multigrid Wilson and DWF, compare to other Multigrid implementations
7)- HDCR resume
Recent DONE
-- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O. <--- DONE
-- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori
-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE
-- GaugeFix into central location <-- DONE
-- Scidac and Ildg metadata handling <-- DONE

View File

@ -40,7 +40,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

View File

@ -58,7 +58,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

View File

@ -93,7 +93,7 @@ int main (int argc, char ** argv)
std::cout << latt_size.back() << "\t\t";
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);

View File

@ -37,6 +37,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/approx/Chebyshev.h>
#include <Grid/algorithms/approx/Remez.h>
#include <Grid/algorithms/approx/MultiShiftFunction.h>
#include <Grid/algorithms/approx/Forecast.h>
#include <Grid/algorithms/iterative/ConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateResidual.h>
@ -44,31 +45,16 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/iterative/SchurRedBlack.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
// Lanczos support
//#include <Grid/algorithms/iterative/MatrixUtils.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/CoarsenedMatrix.h>
#include <Grid/algorithms/FFT.h>
// Eigen/lanczos
// EigCg
// MCR
// Pcg
// Multishift CG
// Hdcg
// GCR
// etc..
// integrator/Leapfrog
// integrator/Omelyan
// integrator/ForceGradient
// montecarlo/hmc
// montecarlo/rhmc
// montecarlo/metropolis
// etc...
#endif

View File

@ -230,6 +230,7 @@ namespace Grid {
// Barrel shift and collect global pencil
std::vector<int> lcoor(Nd), gcoor(Nd);
result = source;
int pc = processor_coor[dim];
for(int p=0;p<processors[dim];p++) {
PARALLEL_REGION
{
@ -240,7 +241,8 @@ namespace Grid {
for(int idx=0;idx<sgrid->lSites();idx++) {
sgrid->LocalIndexToLocalCoor(idx,cbuf);
peekLocalSite(s,result,cbuf);
cbuf[dim]+=p*L;
cbuf[dim]+=((pc+p) % processors[dim])*L;
// cbuf[dim]+=p*L;
pokeLocalSite(s,pgbuf,cbuf);
}
}
@ -278,7 +280,6 @@ namespace Grid {
flops+= flops_call*NN;
// writing out result
int pc = processor_coor[dim];
PARALLEL_REGION
{
std::vector<int> clbuf(Nd), cgbuf(Nd);

View File

@ -162,15 +162,10 @@ namespace Grid {
_Mat.M(in,out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
ComplexD dot;
_Mat.M(in,out);
dot= innerProduct(in,out);
n1=real(dot);
dot = innerProduct(out,out);
n2=real(dot);
ComplexD dot= innerProduct(in,out); n1=real(dot);
n2=norm2(out);
}
void HermOp(const Field &in, Field &out){
_Mat.M(in,out);
@ -192,10 +187,10 @@ namespace Grid {
ni=Mpc(in,tmp);
no=MpcDag(tmp,out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
MpcDagMpc(in,out,n1,n2);
}
void HermOp(const Field &in, Field &out){
virtual void HermOp(const Field &in, Field &out){
RealD n1,n2;
HermOpAndNorm(in,out,n1,n2);
}
@ -212,7 +207,6 @@ namespace Grid {
void OpDir (const Field &in, Field &out,int dir,int disp) {
assert(0);
}
};
template<class Matrix,class Field>
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
@ -270,7 +264,6 @@ namespace Grid {
return axpy_norm(out,-1.0,tmp,in);
}
};
template<class Matrix,class Field>
class SchurDiagTwoOperator : public SchurOperatorBase<Field> {
protected:
@ -299,6 +292,45 @@ namespace Grid {
return axpy_norm(out,-1.0,tmp,in);
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi
///////////////////////////////////////////////////////////////////////////////////////////////////
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
///////////////////////////////////////////////////////////////////////////////////////////////////
// Staggered use
///////////////////////////////////////////////////////////////////////////////////////////////////
template<class Matrix,class Field>
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
protected:
Matrix &_Mat;
public:
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
n2 = Mpc(in,out);
ComplexD dot= innerProduct(in,out);
n1 = real(dot);
}
virtual void HermOp(const Field &in, Field &out){
Mpc(in,out);
}
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
_Mat.Meooe(in,tmp);
_Mat.MooeeInv(tmp,out);
_Mat.MeooeDag(out,tmp);
_Mat.Mooee(in,out);
return axpy_norm(out,-1.0,tmp,out);
}
virtual RealD MpcDag (const Field &in, Field &out){
return Mpc(in,out);
}
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
assert(0);// Never need with staggered
}
};
template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
/////////////////////////////////////////////////////////////

View File

@ -8,6 +8,7 @@
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Christoph Lehner <clehner@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -193,6 +194,47 @@ namespace Grid {
return sum;
};
RealD approxD(RealD x)
{
RealD Un;
RealD Unm;
RealD Unp;
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
RealD U0=1;
RealD U1=2*y;
RealD sum;
sum = Coeffs[1]*U0;
sum+= Coeffs[2]*U1*2.0;
Un =U1;
Unm=U0;
for(int i=2;i<order-1;i++){
Unp=2*y*Un-Unm;
Unm=Un;
Un =Unp;
sum+= Un*Coeffs[i+1]*(i+1.0);
}
return sum/(0.5*(hi-lo));
};
RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) {
RealD x = x0;
RealD eps;
int i;
for (i=0;i<maxiter;i++) {
eps = approx(x) - z;
if (fabs(eps / z) < resid)
return x;
x = x - eps / approxD(x);
}
return std::numeric_limits<double>::quiet_NaN();
}
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {

View File

@ -0,0 +1,152 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/approx/Forecast.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef INCLUDED_FORECAST_H
#define INCLUDED_FORECAST_H
namespace Grid {
// Abstract base class.
// Takes a matrix (Mat), a source (phi), and a vector of Fields (chi)
// and returns a forecasted solution to the system D*psi = phi (psi).
template<class Matrix, class Field>
class Forecast
{
public:
virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0;
};
// Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012),
// used to forecast solutions across poles of the EOFA heatbath.
//
// Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C)
template<class Matrix, class Field>
class ChronoForecast : public Forecast<Matrix,Field>
{
public:
Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns)
{
int degree = prev_solns.size();
Field chi(phi); // forecasted solution
// Trivial cases
if(degree == 0){ chi = zero; return chi; }
else if(degree == 1){ return prev_solns[0]; }
RealD dot;
ComplexD xp;
Field r(phi); // residual
Field Mv(phi);
std::vector<Field> v(prev_solns); // orthonormalized previous solutions
std::vector<Field> MdagMv(degree,phi);
// Array to hold the matrix elements
std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree));
// Solution and source vectors
std::vector<ComplexD> a(degree);
std::vector<ComplexD> b(degree);
// Orthonormalize the vector basis
for(int i=0; i<degree; i++){
v[i] *= 1.0/std::sqrt(norm2(v[i]));
for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; }
}
// Perform sparse matrix multiplication and construct rhs
for(int i=0; i<degree; i++){
b[i] = innerProduct(v[i],phi);
Mat.M(v[i],Mv);
Mat.Mdag(Mv,MdagMv[i]);
G[i][i] = innerProduct(v[i],MdagMv[i]);
}
// Construct the matrix
for(int j=0; j<degree; j++){
for(int k=j+1; k<degree; k++){
G[j][k] = innerProduct(v[j],MdagMv[k]);
G[k][j] = std::conj(G[j][k]);
}}
// Gauss-Jordan elimination with partial pivoting
for(int i=0; i<degree; i++){
// Perform partial pivoting
int k = i;
for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } }
if(k != i){
xp = b[k];
b[k] = b[i];
b[i] = xp;
for(int j=0; j<degree; j++){
xp = G[k][j];
G[k][j] = G[i][j];
G[i][j] = xp;
}
}
// Convert matrix to upper triangular form
for(int j=i+1; j<degree; j++){
xp = G[j][i]/G[i][i];
b[j] -= xp * b[i];
for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; }
}
}
// Use Gaussian elimination to solve equations and calculate initial guess
chi = zero;
r = phi;
for(int i=degree-1; i>=0; i--){
a[i] = 0.0;
for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; }
a[i] = (b[i]-a[i])/G[i][i];
chi += a[i]*v[i];
r -= a[i]*MdagMv[i];
}
RealD true_r(0.0);
ComplexD tmp;
for(int i=0; i<degree; i++){
tmp = -b[i];
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
tmp = std::conj(tmp)*tmp;
true_r += std::sqrt(tmp.real());
}
RealD error = std::sqrt(norm2(r)/norm2(phi));
std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl;
return chi;
};
};
}
#endif

View File

@ -0,0 +1,754 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Chulwoo Jung <chulwoo@bnl.gov>
Author: Christoph Lehner <clehner@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BIRL_H
#define GRID_BIRL_H
#include <string.h> //memset
#include <zlib.h>
#include <sys/stat.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldVectorIO.h>
namespace Grid {
/////////////////////////////////////////////////////////////
// Implicitly restarted lanczos
/////////////////////////////////////////////////////////////
template<class Field>
class BlockImplicitlyRestartedLanczos {
const RealD small = 1.0e-16;
public:
int lock;
int get;
int Niter;
int converged;
int Nminres; // Minimum number of restarts; only check for convergence after
int Nstop; // Number of evecs checked for convergence
int Nk; // Number of converged sought
int Np; // Np -- Number of spare vecs in kryloc space
int Nm; // Nm -- total number of vectors
int orth_period;
RealD OrthoTime;
RealD eresid, betastp;
SortEigen<Field> _sort;
LinearFunction<Field> &_HermOp;
LinearFunction<Field> &_HermOpTest;
/////////////////////////
// Constructor
/////////////////////////
BlockImplicitlyRestartedLanczos(
LinearFunction<Field> & HermOp,
LinearFunction<Field> & HermOpTest,
int _Nstop, // sought vecs
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
RealD _betastp, // if beta(k) < betastp: converged
int _Niter, // Max iterations
int _Nminres, int _orth_period = 1) :
_HermOp(HermOp),
_HermOpTest(HermOpTest),
Nstop(_Nstop),
Nk(_Nk),
Nm(_Nm),
eresid(_eresid),
betastp(_betastp),
Niter(_Niter),
Nminres(_Nminres),
orth_period(_orth_period)
{
Np = Nm-Nk; assert(Np>0);
};
BlockImplicitlyRestartedLanczos(
LinearFunction<Field> & HermOp,
LinearFunction<Field> & HermOpTest,
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
RealD _betastp, // if beta(k) < betastp: converged
int _Niter, // Max iterations
int _Nminres,
int _orth_period = 1) :
_HermOp(HermOp),
_HermOpTest(HermOpTest),
Nstop(_Nk),
Nk(_Nk),
Nm(_Nm),
eresid(_eresid),
betastp(_betastp),
Niter(_Niter),
Nminres(_Nminres),
orth_period(_orth_period)
{
Np = Nm-Nk; assert(Np>0);
};
/* Saad PP. 195
1. Choose an initial vector v1 of 2-norm unity. Set β1 0, v0 0
2. For k = 1,2,...,m Do:
3. wk:=Avkβkv_{k1}
4. αk:=(wk,vk) //
5. wk:=wkαkvk // wk orthog vk
6. βk+1 := wk2. If βk+1 = 0 then Stop
7. vk+1 := wk/βk+1
8. EndDo
*/
void step(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
BasisFieldVector<Field>& evec,
Field& w,int Nm,int k)
{
assert( k< Nm );
GridStopWatch gsw_op,gsw_o;
Field& evec_k = evec[k];
gsw_op.Start();
_HermOp(evec_k,w);
gsw_op.Stop();
if(k>0){
w -= lme[k-1] * evec[k-1];
}
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
RealD alph = real(zalph);
w = w - alph * evec_k;// 5. wk:=wkαkvk
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
// 7. vk+1 := wk/βk+1
std::cout<<GridLogMessage << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
const RealD tiny = 1.0e-20;
if ( beta < tiny ) {
std::cout<<GridLogMessage << " beta is tiny "<<beta<<std::endl;
}
lmd[k] = alph;
lme[k] = beta;
gsw_o.Start();
if (k>0 && k % orth_period == 0) {
orthogonalize(w,evec,k); // orthonormalise
}
gsw_o.Stop();
if(k < Nm-1) {
evec[k+1] = w;
}
std::cout << GridLogMessage << "Timing: operator=" << gsw_op.Elapsed() <<
" orth=" << gsw_o.Elapsed() << std::endl;
}
void qr_decomp(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
int Nk,
int Nm,
std::vector<RealD>& Qt,
RealD Dsh,
int kmin,
int kmax)
{
int k = kmin-1;
RealD x;
RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]);
RealD c = ( lmd[k] -Dsh) *Fden;
RealD s = -lme[k] *Fden;
RealD tmpa1 = lmd[k];
RealD tmpa2 = lmd[k+1];
RealD tmpb = lme[k];
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
x =-s*lme[k+1];
lme[k+1] = c*lme[k+1];
for(int i=0; i<Nk; ++i){
RealD Qtmp1 = Qt[i+Nm*k ];
RealD Qtmp2 = Qt[i+Nm*(k+1)];
Qt[i+Nm*k ] = c*Qtmp1 - s*Qtmp2;
Qt[i+Nm*(k+1)] = s*Qtmp1 + c*Qtmp2;
}
// Givens transformations
for(int k = kmin; k < kmax-1; ++k){
RealD Fden = 1.0/hypot(x,lme[k-1]);
RealD c = lme[k-1]*Fden;
RealD s = - x*Fden;
RealD tmpa1 = lmd[k];
RealD tmpa2 = lmd[k+1];
RealD tmpb = lme[k];
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
lme[k-1] = c*lme[k-1] -s*x;
if(k != kmax-2){
x = -s*lme[k+1];
lme[k+1] = c*lme[k+1];
}
for(int i=0; i<Nk; ++i){
RealD Qtmp1 = Qt[i+Nm*k ];
RealD Qtmp2 = Qt[i+Nm*(k+1)];
Qt[i+Nm*k ] = c*Qtmp1 -s*Qtmp2;
Qt[i+Nm*(k+1)] = s*Qtmp1 +c*Qtmp2;
}
}
}
#ifdef USE_LAPACK_IRL
#define LAPACK_INT int
//long long
void diagonalize_lapack(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
int N1,
int N2,
std::vector<RealD>& Qt,
GridBase *grid){
std::cout << GridLogMessage << "diagonalize_lapack start\n";
GridStopWatch gsw;
const int size = Nm;
// tevals.resize(size);
// tevecs.resize(size);
LAPACK_INT NN = N1;
std::vector<double> evals_tmp(NN);
std::vector<double> evec_tmp(NN*NN);
memset(&evec_tmp[0],0,sizeof(double)*NN*NN);
// double AA[NN][NN];
std::vector<double> DD(NN);
std::vector<double> EE(NN);
for (int i = 0; i< NN; i++)
for (int j = i - 1; j <= i + 1; j++)
if ( j < NN && j >= 0 ) {
if (i==j) DD[i] = lmd[i];
if (i==j) evals_tmp[i] = lmd[i];
if (j==(i-1)) EE[j] = lme[j];
}
LAPACK_INT evals_found;
LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
LAPACK_INT liwork = 3+NN*10 ;
std::vector<LAPACK_INT> iwork(liwork);
std::vector<double> work(lwork);
std::vector<LAPACK_INT> isuppz(2*NN);
char jobz = 'V'; // calculate evals & evecs
char range = 'I'; // calculate all evals
// char range = 'A'; // calculate all evals
char uplo = 'U'; // refer to upper half of original matrix
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
std::vector<int> ifail(NN);
LAPACK_INT info;
// int total = QMP_get_number_of_nodes();
// int node = QMP_get_node_number();
// GridBase *grid = evec[0]._grid;
int total = grid->_Nprocessors;
int node = grid->_processor;
int interval = (NN/total)+1;
double vl = 0.0, vu = 0.0;
LAPACK_INT il = interval*node+1 , iu = interval*(node+1);
if (iu > NN) iu=NN;
double tol = 0.0;
if (1) {
memset(&evals_tmp[0],0,sizeof(double)*NN);
if ( il <= NN){
std::cout << GridLogMessage << "dstegr started" << std::endl;
gsw.Start();
dstegr(&jobz, &range, &NN,
(double*)&DD[0], (double*)&EE[0],
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
&tol, // tolerance
&evals_found, &evals_tmp[0], (double*)&evec_tmp[0], &NN,
&isuppz[0],
&work[0], &lwork, &iwork[0], &liwork,
&info);
gsw.Stop();
std::cout << GridLogMessage << "dstegr completed in " << gsw.Elapsed() << std::endl;
for (int i = iu-1; i>= il-1; i--){
evals_tmp[i] = evals_tmp[i - (il-1)];
if (il>1) evals_tmp[i-(il-1)]=0.;
for (int j = 0; j< NN; j++){
evec_tmp[i*NN + j] = evec_tmp[(i - (il-1)) * NN + j];
if (il>1) evec_tmp[(i-(il-1)) * NN + j]=0.;
}
}
}
{
// QMP_sum_double_array(evals_tmp,NN);
// QMP_sum_double_array((double *)evec_tmp,NN*NN);
grid->GlobalSumVector(&evals_tmp[0],NN);
grid->GlobalSumVector(&evec_tmp[0],NN*NN);
}
}
// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
for(int i=0;i<NN;i++){
for(int j=0;j<NN;j++)
Qt[(NN-1-i)*N2+j]=evec_tmp[i*NN + j];
lmd [NN-1-i]=evals_tmp[i];
}
std::cout << GridLogMessage << "diagonalize_lapack complete\n";
}
#undef LAPACK_INT
#endif
void diagonalize(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
int N2,
int N1,
std::vector<RealD>& Qt,
GridBase *grid)
{
#ifdef USE_LAPACK_IRL
const int check_lapack=0; // just use lapack if 0, check against lapack if 1
if(!check_lapack)
return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid);
std::vector <RealD> lmd2(N1);
std::vector <RealD> lme2(N1);
std::vector<RealD> Qt2(N1*N1);
for(int k=0; k<N1; ++k){
lmd2[k] = lmd[k];
lme2[k] = lme[k];
}
for(int k=0; k<N1*N1; ++k)
Qt2[k] = Qt[k];
// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
#endif
int Niter = 10000*N1;
int kmin = 1;
int kmax = N2;
// (this should be more sophisticated)
for(int iter=0; ; ++iter){
if ( (iter+1)%(100*N1)==0)
std::cout<<GridLogMessage << "[QL method] Not converged - iteration "<<iter+1<<"\n";
// determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
// (Dsh: shift)
// transformation
qr_decomp(lmd,lme,N2,N1,Qt,Dsh,kmin,kmax);
// Convergence criterion (redef of kmin and kamx)
for(int j=kmax-1; j>= kmin; --j){
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
if(fabs(lme[j-1])+dds > dds){
kmax = j+1;
goto continued;
}
}
Niter = iter;
#ifdef USE_LAPACK_IRL
if(check_lapack){
const double SMALL=1e-8;
diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid);
std::vector <RealD> lmd3(N2);
for(int k=0; k<N2; ++k) lmd3[k]=lmd[k];
_sort.push(lmd3,N2);
_sort.push(lmd2,N2);
for(int k=0; k<N2; ++k){
if (fabs(lmd2[k] - lmd3[k]) >SMALL) std::cout<<GridLogMessage <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
// if (fabs(lme2[k] - lme[k]) >SMALL) std::cout<<GridLogMessage <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
}
for(int k=0; k<N1*N1; ++k){
// if (fabs(Qt2[k] - Qt[k]) >SMALL) std::cout<<GridLogMessage <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
}
}
#endif
return;
continued:
for(int j=0; j<kmax-1; ++j){
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
if(fabs(lme[j])+dds > dds){
kmin = j+1;
break;
}
}
}
std::cout<<GridLogMessage << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
abort();
}
#if 1
template<typename T>
static RealD normalise(T& v)
{
RealD nn = norm2(v);
nn = sqrt(nn);
v = v * (1.0/nn);
return nn;
}
void orthogonalize(Field& w,
BasisFieldVector<Field>& evec,
int k)
{
double t0=-usecond()/1e6;
evec.orthogonalize(w,k);
normalise(w);
t0+=usecond()/1e6;
OrthoTime +=t0;
}
void setUnit_Qt(int Nm, std::vector<RealD> &Qt) {
for(int i=0; i<Qt.size(); ++i) Qt[i] = 0.0;
for(int k=0; k<Nm; ++k) Qt[k + k*Nm] = 1.0;
}
/* Rudy Arthur's thesis pp.137
------------------------
Require: M > K P = M K
Compute the factorization AVM = VM HM + fM eM
repeat
Q=I
for i = 1,...,P do
QiRi =HM θiI Q = QQi
H M = Q i H M Q i
end for
βK =HM(K+1,K) σK =Q(M,K)
r=vK+1βK +rσK
VK =VM(1:M)Q(1:M,1:K)
HK =HM(1:K,1:K)
AVK =VKHK +fKeK Extend to an M = K + P step factorization AVM = VMHM + fMeM
until convergence
*/
void calc(std::vector<RealD>& eval,
BasisFieldVector<Field>& evec,
const Field& src,
int& Nconv,
bool reverse,
int SkipTest)
{
GridBase *grid = evec._v[0]._grid;//evec.get(0 + evec_offset)._grid;
assert(grid == src._grid);
std::cout<<GridLogMessage << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
std::cout<<GridLogMessage << " -- Nm = " << Nm << std::endl;
std::cout<<GridLogMessage << " -- size of eval = " << eval.size() << std::endl;
std::cout<<GridLogMessage << " -- size of evec = " << evec.size() << std::endl;
assert(Nm <= evec.size() && Nm <= eval.size());
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
RealD evalMaxApprox = 0.0;
{
auto src_n = src;
auto tmp = src;
const int _MAX_ITER_IRL_MEVAPP_ = 50;
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
_HermOpTest(src_n,tmp);
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vden = norm2(src_n);
RealD na = vnum/vden;
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
i=_MAX_ITER_IRL_MEVAPP_;
evalMaxApprox = na;
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
src_n = tmp;
}
}
std::vector<RealD> lme(Nm);
std::vector<RealD> lme2(Nm);
std::vector<RealD> eval2(Nm);
std::vector<RealD> eval2_copy(Nm);
std::vector<RealD> Qt(Nm*Nm);
Field f(grid);
Field v(grid);
int k1 = 1;
int k2 = Nk;
Nconv = 0;
RealD beta_k;
// Set initial vector
evec[0] = src;
normalise(evec[0]);
std:: cout<<GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0])<<std::endl;
// Initial Nk steps
OrthoTime=0.;
double t0=usecond()/1e6;
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
double t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::Initial steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
t1=usecond()/1e6;
// Restarting loop begins
for(int iter = 0; iter<Niter; ++iter){
std::cout<<GridLogMessage<<"\n Restart iteration = "<< iter << std::endl;
//
// Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs.
// We loop over
//
OrthoTime=0.;
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: "<<Np <<" steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
f *= lme[Nm-1];
t1=usecond()/1e6;
// getting eigenvalues
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k+k1-1];
lme2[k] = lme[k+k1-1];
}
setUnit_Qt(Nm,Qt);
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// sorting
eval2_copy = eval2;
_sort.push(eval2,Nm);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: eval sorting: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// Implicitly shifted QR transformations
setUnit_Qt(Nm,Qt);
for(int ip=0; ip<k2; ++ip){
std::cout<<GridLogMessage << "eval "<< ip << " "<< eval2[ip] << std::endl;
}
for(int ip=k2; ip<Nm; ++ip){
std::cout<<GridLogMessage << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
}
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::qr_decomp: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
assert(k2<Nm);
assert(k2<Nm);
assert(k1>0);
evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::QR rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
fflush(stdout);
// Compressed vector f and beta(k2)
f *= Qt[Nm-1+Nm*(k2-1)];
f += lme[k2-1] * evec[k2];
beta_k = norm2(f);
beta_k = sqrt(beta_k);
std::cout<<GridLogMessage<<" beta(k) = "<<beta_k<<std::endl;
RealD betar = 1.0/beta_k;
evec[k2] = betar * f;
lme[k2-1] = beta_k;
// Convergence test
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k];
lme2[k] = lme[k];
std::cout<<GridLogMessage << "eval2[" << k << "] = " << eval2[k] << std::endl;
}
setUnit_Qt(Nm,Qt);
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
Nconv = 0;
if (iter >= Nminres) {
std::cout << GridLogMessage << "Rotation to test convergence " << std::endl;
Field ev0_orig(grid);
ev0_orig = evec[0];
evec.rotate(Qt,0,Nk,0,Nk,Nm);
{
std::cout << GridLogMessage << "Test convergence" << std::endl;
Field B(grid);
for(int j = 0; j<Nk; j+=SkipTest){
B=evec[j];
//std::cout << "Checkerboard: " << evec[j].checkerboard << std::endl;
B.checkerboard = evec[0].checkerboard;
_HermOpTest(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
eval2[j] = vnum/vden;
v -= eval2[j]*B;
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] "
<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j] << " (" << eval2_copy[j] << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv
<<" "<< vnum/(sqrt(vden)*sqrt(vv0))
<< " norm(B["<<j<<"])="<< vden <<std::endl;
// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
if((vv<eresid*eresid) && (j == Nconv) ){
Nconv+=SkipTest;
}
}
// test if we converged, if so, terminate
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage<<" #modes converged: "<<Nconv<<std::endl;
if( Nconv>=Nstop || beta_k < betastp){
goto converged;
}
std::cout << GridLogMessage << "Rotate back" << std::endl;
//B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss];
{
Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk);
for (int k=0;k<Nk;k++)
for (int j=0;j<Nk;j++)
qm(j,k) = Qt[k+Nm*j];
GridStopWatch timeInv;
timeInv.Start();
Eigen::MatrixXd qmI = qm.inverse();
timeInv.Stop();
std::vector<RealD> QtI(Nm*Nm);
for (int k=0;k<Nk;k++)
for (int j=0;j<Nk;j++)
QtI[k+Nm*j] = qmI(j,k);
RealD res_check_rotate_inverse = (qm*qmI - Eigen::MatrixXd::Identity(Nk,Nk)).norm(); // sqrt( |X|^2 )
assert(res_check_rotate_inverse < 1e-7);
evec.rotate(QtI,0,Nk,0,Nk,Nm);
axpy(ev0_orig,-1.0,evec[0],ev0_orig);
std::cout << GridLogMessage << "Rotation done (in " << timeInv.Elapsed() << " = " << timeInv.useconds() << " us" <<
", error = " << res_check_rotate_inverse <<
"); | evec[0] - evec[0]_orig | = " << ::sqrt(norm2(ev0_orig)) << std::endl;
}
}
} else {
std::cout << GridLogMessage << "iter < Nminres: do not yet test for convergence\n";
} // end of iter loop
}
std::cout<<GridLogMessage<<"\n NOT converged.\n";
abort();
converged:
if (SkipTest == 1) {
eval = eval2;
} else {
// test quickly
for (int j=0;j<Nstop;j+=SkipTest) {
std::cout<<GridLogMessage << "Eigenvalue[" << j << "] = " << eval2[j] << " (" << eval2_copy[j] << ")" << std::endl;
}
eval2_copy.resize(eval2.size());
eval = eval2_copy;
}
evec.sortInPlace(eval,reverse);
{
// test
for (int j=0;j<Nstop;j++) {
std::cout<<GridLogMessage << " |e[" << j << "]|^2 = " << norm2(evec[j]) << std::endl;
}
}
//_sort.push(eval,evec,Nconv);
//evec.sort(eval,Nconv);
std::cout<<GridLogMessage << "\n Converged\n Summary :\n";
std::cout<<GridLogMessage << " -- Iterations = "<< Nconv << "\n";
std::cout<<GridLogMessage << " -- beta(k) = "<< beta_k << "\n";
std::cout<<GridLogMessage << " -- Nconv = "<< Nconv << "\n";
}
#endif
};
}
#endif

View File

@ -0,0 +1,143 @@
namespace Grid {
/*
BlockProjector
If _HP_BLOCK_PROJECTORS_ is defined, we assume that _evec is a basis that is not
fully orthonormalized (to the precision of the coarse field) and we allow for higher-precision
coarse field than basis field.
*/
//#define _HP_BLOCK_PROJECTORS_
template<typename Field>
class BlockProjector {
public:
BasisFieldVector<Field>& _evec;
BlockedGrid<Field>& _bgrid;
BlockProjector(BasisFieldVector<Field>& evec, BlockedGrid<Field>& bgrid) : _evec(evec), _bgrid(bgrid) {
}
void createOrthonormalBasis(RealD thres = 0.0) {
GridStopWatch sw;
sw.Start();
int cnt = 0;
#pragma omp parallel shared(cnt)
{
int lcnt = 0;
#pragma omp for
for (int b=0;b<_bgrid._o_blocks;b++) {
for (int i=0;i<_evec._Nm;i++) {
auto nrm0 = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]);
// |i> -= <j|i> |j>
for (int j=0;j<i;j++) {
_bgrid.block_caxpy(b,_evec._v[i],-_bgrid.block_sp(b,_evec._v[j],_evec._v[i]),_evec._v[j],_evec._v[i]);
}
auto nrm = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]);
auto eps = nrm/nrm0;
if (Reduce(eps).real() < thres) {
lcnt++;
}
// TODO: if norm is too small, remove this eigenvector/mark as not needed; in practice: set it to zero norm here and return a mask
// that is then used later to decide not to write certain eigenvectors to disk (add a norm calculation before subtraction step and look at nrm/nrm0 < eps to decide)
_bgrid.block_cscale(b,1.0 / sqrt(nrm),_evec._v[i]);
}
}
#pragma omp critical
{
cnt += lcnt;
}
}
sw.Stop();
std::cout << GridLogMessage << "Gram-Schmidt to create blocked basis took " << sw.Elapsed() << " (" << ((RealD)cnt / (RealD)_bgrid._o_blocks / (RealD)_evec._Nm)
<< " below threshold)" << std::endl;
}
template<typename CoarseField>
void coarseToFine(const CoarseField& in, Field& out) {
out = zero;
out.checkerboard = _evec._v[0].checkerboard;
int Nbasis = sizeof(in._odata[0]._internal._internal) / sizeof(in._odata[0]._internal._internal[0]);
assert(Nbasis == _evec._Nm);
#pragma omp parallel for
for (int b=0;b<_bgrid._o_blocks;b++) {
for (int j=0;j<_evec._Nm;j++) {
_bgrid.block_caxpy(b,out,in._odata[b]._internal._internal[j],_evec._v[j],out);
}
}
}
template<typename CoarseField>
void fineToCoarse(const Field& in, CoarseField& out) {
out = zero;
int Nbasis = sizeof(out._odata[0]._internal._internal) / sizeof(out._odata[0]._internal._internal[0]);
assert(Nbasis == _evec._Nm);
Field tmp(_bgrid._grid);
tmp = in;
#pragma omp parallel for
for (int b=0;b<_bgrid._o_blocks;b++) {
for (int j=0;j<_evec._Nm;j++) {
// |rhs> -= <j|rhs> |j>
auto c = _bgrid.block_sp(b,_evec._v[j],tmp);
_bgrid.block_caxpy(b,tmp,-c,_evec._v[j],tmp); // may make this more numerically stable
out._odata[b]._internal._internal[j] = c;
}
}
}
template<typename CoarseField>
void deflateFine(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) {
result = zero;
for (int i=0;i<N;i++) {
Field tmp(result._grid);
coarseToFine(_coef._v[i],tmp);
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
}
}
template<typename CoarseField>
void deflateCoarse(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) {
CoarseField src_coarse(_coef._v[0]._grid);
CoarseField result_coarse = src_coarse;
result_coarse = zero;
fineToCoarse(src_orig,src_coarse);
for (int i=0;i<N;i++) {
axpy(result_coarse,TensorRemove(innerProduct(_coef._v[i],src_coarse)) / eval[i],_coef._v[i],result_coarse);
}
coarseToFine(result_coarse,result);
}
template<typename CoarseField>
void deflate(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) {
// Deflation on coarse Grid is much faster, so use it by default. Deflation on fine Grid is kept for legacy reasons for now.
deflateCoarse(_coef,eval,N,src_orig,result);
}
};
}

View File

@ -0,0 +1,401 @@
namespace Grid {
template<typename Field>
class BlockedGrid {
public:
GridBase* _grid;
typedef typename Field::scalar_type Coeff_t;
typedef typename Field::vector_type vCoeff_t;
std::vector<int> _bs; // block size
std::vector<int> _nb; // number of blocks
std::vector<int> _l; // local dimensions irrespective of cb
std::vector<int> _l_cb; // local dimensions of checkerboarded vector
std::vector<int> _l_cb_o; // local dimensions of inner checkerboarded vector
std::vector<int> _bs_cb; // block size in checkerboarded vector
std::vector<int> _nb_o; // number of blocks of simd o-sites
int _nd, _blocks, _cf_size, _cf_block_size, _cf_o_block_size, _o_blocks, _block_sites;
BlockedGrid(GridBase* grid, const std::vector<int>& block_size) :
_grid(grid), _bs(block_size), _nd((int)_bs.size()),
_nb(block_size), _l(block_size), _l_cb(block_size), _nb_o(block_size),
_l_cb_o(block_size), _bs_cb(block_size) {
_blocks = 1;
_o_blocks = 1;
_l = grid->FullDimensions();
_l_cb = grid->LocalDimensions();
_l_cb_o = grid->_rdimensions;
_cf_size = 1;
_block_sites = 1;
for (int i=0;i<_nd;i++) {
_l[i] /= grid->_processors[i];
assert(!(_l[i] % _bs[i])); // lattice must accommodate choice of blocksize
int r = _l[i] / _l_cb[i];
assert(!(_bs[i] % r)); // checkerboarding must accommodate choice of blocksize
_bs_cb[i] = _bs[i] / r;
_block_sites *= _bs_cb[i];
_nb[i] = _l[i] / _bs[i];
_nb_o[i] = _nb[i] / _grid->_simd_layout[i];
if (_nb[i] % _grid->_simd_layout[i]) { // simd must accommodate choice of blocksize
std::cout << GridLogMessage << "Problem: _nb[" << i << "] = " << _nb[i] << " _grid->_simd_layout[" << i << "] = " << _grid->_simd_layout[i] << std::endl;
assert(0);
}
_blocks *= _nb[i];
_o_blocks *= _nb_o[i];
_cf_size *= _l[i];
}
_cf_size *= 12 / 2;
_cf_block_size = _cf_size / _blocks;
_cf_o_block_size = _cf_size / _o_blocks;
std::cout << GridLogMessage << "BlockedGrid:" << std::endl;
std::cout << GridLogMessage << " _l = " << _l << std::endl;
std::cout << GridLogMessage << " _l_cb = " << _l_cb << std::endl;
std::cout << GridLogMessage << " _l_cb_o = " << _l_cb_o << std::endl;
std::cout << GridLogMessage << " _bs = " << _bs << std::endl;
std::cout << GridLogMessage << " _bs_cb = " << _bs_cb << std::endl;
std::cout << GridLogMessage << " _nb = " << _nb << std::endl;
std::cout << GridLogMessage << " _nb_o = " << _nb_o << std::endl;
std::cout << GridLogMessage << " _blocks = " << _blocks << std::endl;
std::cout << GridLogMessage << " _o_blocks = " << _o_blocks << std::endl;
std::cout << GridLogMessage << " sizeof(vCoeff_t) = " << sizeof(vCoeff_t) << std::endl;
std::cout << GridLogMessage << " _cf_size = " << _cf_size << std::endl;
std::cout << GridLogMessage << " _cf_block_size = " << _cf_block_size << std::endl;
std::cout << GridLogMessage << " _block_sites = " << _block_sites << std::endl;
std::cout << GridLogMessage << " _grid->oSites() = " << _grid->oSites() << std::endl;
// _grid->Barrier();
//abort();
}
void block_to_coor(int b, std::vector<int>& x0) {
std::vector<int> bcoor;
bcoor.resize(_nd);
x0.resize(_nd);
assert(b < _o_blocks);
Lexicographic::CoorFromIndex(bcoor,b,_nb_o);
int i;
for (i=0;i<_nd;i++) {
x0[i] = bcoor[i]*_bs_cb[i];
}
//std::cout << GridLogMessage << "Map block b -> " << x0 << std::endl;
}
void block_site_to_o_coor(const std::vector<int>& x0, std::vector<int>& coor, int i) {
Lexicographic::CoorFromIndex(coor,i,_bs_cb);
for (int j=0;j<_nd;j++)
coor[j] += x0[j];
}
int block_site_to_o_site(const std::vector<int>& x0, int i) {
std::vector<int> coor; coor.resize(_nd);
block_site_to_o_coor(x0,coor,i);
Lexicographic::IndexFromCoor(coor,i,_l_cb_o);
return i;
}
vCoeff_t block_sp(int b, const Field& x, const Field& y) {
std::vector<int> x0;
block_to_coor(b,x0);
vCoeff_t ret = 0.0;
for (int i=0;i<_block_sites;i++) { // only odd sites
int ss = block_site_to_o_site(x0,i);
ret += TensorRemove(innerProduct(x._odata[ss],y._odata[ss]));
}
return ret;
}
vCoeff_t block_sp(int b, const Field& x, const std::vector< ComplexD >& y) {
std::vector<int> x0;
block_to_coor(b,x0);
constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t);
int lsize = _cf_o_block_size / _block_sites;
std::vector< ComplexD > ret(nsimd);
for (int i=0;i<nsimd;i++)
ret[i] = 0.0;
for (int i=0;i<_block_sites;i++) { // only odd sites
int ss = block_site_to_o_site(x0,i);
int n = lsize / nsimd;
for (int l=0;l<n;l++) {
for (int j=0;j<nsimd;j++) {
int t = lsize * i + l*nsimd + j;
ret[j] += conjugate(((Coeff_t*)&x._odata[ss]._internal)[l*nsimd + j]) * y[t];
}
}
}
vCoeff_t vret;
for (int i=0;i<nsimd;i++)
((Coeff_t*)&vret)[i] = (Coeff_t)ret[i];
return vret;
}
template<class T>
void vcaxpy(iScalar<T>& r,const vCoeff_t& a,const iScalar<T>& x,const iScalar<T>& y) {
vcaxpy(r._internal,a,x._internal,y._internal);
}
template<class T,int N>
void vcaxpy(iVector<T,N>& r,const vCoeff_t& a,const iVector<T,N>& x,const iVector<T,N>& y) {
for (int i=0;i<N;i++)
vcaxpy(r._internal[i],a,x._internal[i],y._internal[i]);
}
void vcaxpy(vCoeff_t& r,const vCoeff_t& a,const vCoeff_t& x,const vCoeff_t& y) {
r = a*x + y;
}
void block_caxpy(int b, Field& ret, const vCoeff_t& a, const Field& x, const Field& y) {
std::vector<int> x0;
block_to_coor(b,x0);
for (int i=0;i<_block_sites;i++) { // only odd sites
int ss = block_site_to_o_site(x0,i);
vcaxpy(ret._odata[ss],a,x._odata[ss],y._odata[ss]);
}
}
void block_caxpy(int b, std::vector< ComplexD >& ret, const vCoeff_t& a, const Field& x, const std::vector< ComplexD >& y) {
std::vector<int> x0;
block_to_coor(b,x0);
constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t);
int lsize = _cf_o_block_size / _block_sites;
for (int i=0;i<_block_sites;i++) { // only odd sites
int ss = block_site_to_o_site(x0,i);
int n = lsize / nsimd;
for (int l=0;l<n;l++) {
vCoeff_t r = a* ((vCoeff_t*)&x._odata[ss]._internal)[l];
for (int j=0;j<nsimd;j++) {
int t = lsize * i + l*nsimd + j;
ret[t] = y[t] + ((Coeff_t*)&r)[j];
}
}
}
}
void block_set(int b, Field& ret, const std::vector< ComplexD >& x) {
std::vector<int> x0;
block_to_coor(b,x0);
int lsize = _cf_o_block_size / _block_sites;
for (int i=0;i<_block_sites;i++) { // only odd sites
int ss = block_site_to_o_site(x0,i);
for (int l=0;l<lsize;l++)
((Coeff_t*)&ret._odata[ss]._internal)[l] = (Coeff_t)x[lsize * i + l]; // convert precision
}
}
void block_get(int b, const Field& ret, std::vector< ComplexD >& x) {
std::vector<int> x0;
block_to_coor(b,x0);
int lsize = _cf_o_block_size / _block_sites;
for (int i=0;i<_block_sites;i++) { // only odd sites
int ss = block_site_to_o_site(x0,i);
for (int l=0;l<lsize;l++)
x[lsize * i + l] = (ComplexD)((Coeff_t*)&ret._odata[ss]._internal)[l];
}
}
template<class T>
void vcscale(iScalar<T>& r,const vCoeff_t& a,const iScalar<T>& x) {
vcscale(r._internal,a,x._internal);
}
template<class T,int N>
void vcscale(iVector<T,N>& r,const vCoeff_t& a,const iVector<T,N>& x) {
for (int i=0;i<N;i++)
vcscale(r._internal[i],a,x._internal[i]);
}
void vcscale(vCoeff_t& r,const vCoeff_t& a,const vCoeff_t& x) {
r = a*x;
}
void block_cscale(int b, const vCoeff_t& a, Field& ret) {
std::vector<int> x0;
block_to_coor(b,x0);
for (int i=0;i<_block_sites;i++) { // only odd sites
int ss = block_site_to_o_site(x0,i);
vcscale(ret._odata[ss],a,ret._odata[ss]);
}
}
void getCanonicalBlockOffset(int cb, std::vector<int>& x0) {
const int ndim = 5;
assert(_nb.size() == ndim);
std::vector<int> _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] };
std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] };
x0.resize(ndim);
assert(cb >= 0);
assert(cb < _nbc[0]*_nbc[1]*_nbc[2]*_nbc[3]*_nbc[4]);
Lexicographic::CoorFromIndex(x0,cb,_nbc);
int i;
for (i=0;i<ndim;i++) {
x0[i] *= _bsc[i];
}
//if (cb < 2)
// std::cout << GridLogMessage << "Map: " << cb << " To: " << x0 << std::endl;
}
void pokeBlockOfVectorCanonical(int cb,Field& v,const std::vector<float>& buf) {
std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] };
std::vector<int> ldim = v._grid->LocalDimensions();
std::vector<int> cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] };
const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4];
// take canonical block cb of v and put it in canonical ordering in buf
std::vector<int> cx0;
getCanonicalBlockOffset(cb,cx0);
#pragma omp parallel
{
std::vector<int> co0,cl0;
co0=cx0; cl0=cx0;
#pragma omp for
for (int i=0;i<_nbsc;i++) {
Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo
for (int j=0;j<(int)_bsc.size();j++)
cl0[j] = cx0[j] + co0[j];
std::vector<int> l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] };
int oi = v._grid->oIndex(l0);
int ii = v._grid->iIndex(l0);
int lti = i;
//if (cb < 2 && i<2)
// std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl;
for (int s=0;s<4;s++)
for (int c=0;c<3;c++) {
Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii];
int ti = 12*lti + 3*s + c;
ld = Coeff_t(buf[2*ti+0], buf[2*ti+1]);
}
}
}
}
void peekBlockOfVectorCanonical(int cb,const Field& v,std::vector<float>& buf) {
std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] };
std::vector<int> ldim = v._grid->LocalDimensions();
std::vector<int> cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] };
const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4];
// take canonical block cb of v and put it in canonical ordering in buf
std::vector<int> cx0;
getCanonicalBlockOffset(cb,cx0);
buf.resize(_cf_block_size * 2);
#pragma omp parallel
{
std::vector<int> co0,cl0;
co0=cx0; cl0=cx0;
#pragma omp for
for (int i=0;i<_nbsc;i++) {
Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo
for (int j=0;j<(int)_bsc.size();j++)
cl0[j] = cx0[j] + co0[j];
std::vector<int> l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] };
int oi = v._grid->oIndex(l0);
int ii = v._grid->iIndex(l0);
int lti = i;
//if (cb < 2 && i<2)
// std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl;
for (int s=0;s<4;s++)
for (int c=0;c<3;c++) {
Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii];
int ti = 12*lti + 3*s + c;
buf[2*ti+0] = ld.real();
buf[2*ti+1] = ld.imag();
}
}
}
}
int globalToLocalCanonicalBlock(int slot,const std::vector<int>& src_nodes,int nb) {
// processor coordinate
int _nd = (int)src_nodes.size();
std::vector<int> _src_nodes = src_nodes;
std::vector<int> pco(_nd);
Lexicographic::CoorFromIndex(pco,slot,_src_nodes);
std::vector<int> cpco = { pco[1], pco[2], pco[3], pco[4], pco[0] };
// get local block
std::vector<int> _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] };
assert(_nd == 5);
std::vector<int> c_src_local_blocks(_nd);
for (int i=0;i<_nd;i++) {
assert(_grid->_fdimensions[i] % (src_nodes[i] * _bs[i]) == 0);
c_src_local_blocks[(i+4) % 5] = _grid->_fdimensions[i] / src_nodes[i] / _bs[i];
}
std::vector<int> cbcoor(_nd); // coordinate of block in slot in canonical form
Lexicographic::CoorFromIndex(cbcoor,nb,c_src_local_blocks);
// cpco, cbcoor
std::vector<int> clbcoor(_nd);
for (int i=0;i<_nd;i++) {
int cgcoor = cpco[i] * c_src_local_blocks[i] + cbcoor[i]; // global block coordinate
int pcoor = cgcoor / _nbc[i]; // processor coordinate in my Grid
int tpcoor = _grid->_processor_coor[(i+1)%5];
if (pcoor != tpcoor)
return -1;
clbcoor[i] = cgcoor - tpcoor * _nbc[i]; // canonical local block coordinate for canonical dimension i
}
int lnb;
Lexicographic::IndexFromCoor(clbcoor,lnb,_nbc);
//std::cout << "Mapped slot = " << slot << " nb = " << nb << " to " << lnb << std::endl;
return lnb;
}
};
}

View File

@ -0,0 +1,163 @@
namespace Grid {
template<class Field>
class BasisFieldVector {
public:
int _Nm;
typedef typename Field::scalar_type Coeff_t;
typedef typename Field::vector_type vCoeff_t;
typedef typename Field::vector_object vobj;
typedef typename vobj::scalar_object sobj;
std::vector<Field> _v; // _Nfull vectors
void report(int n,GridBase* value) {
std::cout << GridLogMessage << "BasisFieldVector allocated:\n";
std::cout << GridLogMessage << " Delta N = " << n << "\n";
std::cout << GridLogMessage << " Size of full vectors (size) = " <<
((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n";
std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl;
value->Barrier();
if (value->IsBoss()) {
system("cat /proc/meminfo");
}
value->Barrier();
}
BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) {
report(Nm,value);
}
~BasisFieldVector() {
}
Field& operator[](int i) {
return _v[i];
}
void orthogonalize(Field& w, int k) {
for(int j=0; j<k; ++j){
Coeff_t ip = (Coeff_t)innerProduct(_v[j],w);
w = w - ip*_v[j];
}
}
void rotate(std::vector<RealD>& Qt,int j0, int j1, int k0,int k1,int Nm) {
GridBase* grid = _v[0]._grid;
#pragma omp parallel
{
std::vector < vobj > B(Nm);
#pragma omp for
for(int ss=0;ss < grid->oSites();ss++){
for(int j=j0; j<j1; ++j) B[j]=0.;
for(int j=j0; j<j1; ++j){
for(int k=k0; k<k1; ++k){
B[j] +=Qt[k+Nm*j] * _v[k]._odata[ss];
}
}
for(int j=j0; j<j1; ++j){
_v[j]._odata[ss] = B[j];
}
}
}
}
size_t size() const {
return _Nm;
}
void resize(int n) {
if (n > _Nm)
_v.reserve(n);
_v.resize(n,_v[0]._grid);
if (n < _Nm)
_v.shrink_to_fit();
report(n - _Nm,_v[0]._grid);
_Nm = n;
}
std::vector<int> getIndex(std::vector<RealD>& sort_vals) {
std::vector<int> idx(sort_vals.size());
iota(idx.begin(), idx.end(), 0);
// sort indexes based on comparing values in v
sort(idx.begin(), idx.end(),
[&sort_vals](int i1, int i2) {return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);});
return idx;
}
void reorderInPlace(std::vector<RealD>& sort_vals, std::vector<int>& idx) {
GridStopWatch gsw;
gsw.Start();
int nswaps = 0;
for (size_t i=0;i<idx.size();i++) {
if (idx[i] != i) {
// find proper place (this could be done in logarithmic time, don't bother for now)
size_t j;
for (j=i;j<idx.size();j++)
if (idx[j]==i)
break;
assert(j!=idx.size());
Field _t(_v[0]._grid);
_t = _v[idx[j]];
_v[idx[j]] = _v[idx[i]];
_v[idx[i]] = _t;
RealD _td = sort_vals[idx[j]];
sort_vals[idx[j]] = sort_vals[idx[i]];
sort_vals[idx[i]] = _td;
int _tt = idx[i];
idx[i] = idx[j];
idx[j] = _tt;
nswaps++;
}
}
// sort values
gsw.Stop();
std::cout << GridLogMessage << "Sorted eigenspace in place in " << gsw.Elapsed() << " using " << nswaps << " swaps" << std::endl;
}
void sortInPlace(std::vector<RealD>& sort_vals, bool reverse) {
std::vector<int> idx = getIndex(sort_vals);
if (reverse)
std::reverse(idx.begin(), idx.end());
reorderInPlace(sort_vals,idx);
}
void deflate(const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
result = zero;
int N = (int)_v.size();
for (int i=0;i<N;i++) {
Field& tmp = _v[i];
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
}
}
};
}

File diff suppressed because it is too large Load Diff

View File

@ -52,8 +52,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
Field &psi) {
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);

View File

@ -53,16 +53,110 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
* M psi = eta
***********************
*Odd
* i) (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
* i) D_oo psi_o = L^{-1} eta_o
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
*Even
* ii) Mee psi_e + Meo psi_o = src_e
*
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
*
*
* TODO: Other options:
*
* a) change checkerboards for Schur e<->o
*
* Left precon by Moo^-1
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
*
* Right precon by Moo^-1
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
* psi_o = M_oo^-1 phi_o
* TODO: Deflation
*/
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackStaggeredSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
};
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); assert(src_o.checkerboard ==Odd);
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
@ -76,12 +170,10 @@ namespace Grid {
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver) :
_HermitianRBSolver(HermitianRBSolver)
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0) : _HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
CBfactorise=cb;
};
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
@ -141,5 +233,166 @@ namespace Grid {
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
};
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoMixed {
private:
LinearFunction<Field> & _HermitianRBSolver;
int CBfactorise;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
};
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
// _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
_HermitianRBSolver(src_o,tmp); assert(tmp.checkerboard==Odd);
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};
}
#endif

View File

@ -49,6 +49,8 @@ public:
template<class object> friend class Lattice;
GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
GridBase(const std::vector<int> & processor_grid,
const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {};
// Physics Grid information.
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
@ -210,9 +212,6 @@ public:
assert(lidx<lSites());
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
}
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
gidx=0;
int mult=1;

View File

@ -61,9 +61,29 @@ public:
virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){
return shift;
}
/////////////////////////////////////////////////////////////////////////
// Constructor takes a parent grid and possibly subdivides communicator.
/////////////////////////////////////////////////////////////////////////
GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
const GridCartesian &parent) : GridBase(processor_grid,parent)
{
Init(dimensions,simd_layout,processor_grid);
}
/////////////////////////////////////////////////////////////////////////
// Construct from comm world
/////////////////////////////////////////////////////////////////////////
GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid) : GridBase(processor_grid)
{
Init(dimensions,simd_layout,processor_grid);
}
void Init(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid)
{
///////////////////////
// Grid information

View File

@ -112,24 +112,57 @@ public:
}
};
GridRedBlackCartesian(const GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors) {};
////////////////////////////////////////////////////////////
// Create Redblack from original grid; require full grid pointer ?
////////////////////////////////////////////////////////////
GridRedBlackCartesian(const GridBase *base) : GridBase(base->_processors,*base)
{
int dims = base->_ndimension;
std::vector<int> checker_dim_mask(dims,1);
int checker_dim = 0;
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim);
};
GridRedBlackCartesian(const std::vector<int> &dimensions,
////////////////////////////////////////////////////////////
// Create redblack from original grid, with non-trivial checker dim mask
////////////////////////////////////////////////////////////
GridRedBlackCartesian(const GridBase *base,
const std::vector<int> &checker_dim_mask,
int checker_dim
) : GridBase(base->_processors,*base)
{
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ;
}
#if 0
////////////////////////////////////////////////////////////
// Create redblack grid ;; deprecate these. Should not
// need direct creation of redblack without a full grid to base on
////////////////////////////////////////////////////////////
GridRedBlackCartesian(const GridBase *base,
const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
const std::vector<int> &checker_dim_mask,
int checker_dim
) : GridBase(processor_grid)
) : GridBase(processor_grid,*base)
{
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim);
}
GridRedBlackCartesian(const std::vector<int> &dimensions,
////////////////////////////////////////////////////////////
// Create redblack grid
////////////////////////////////////////////////////////////
GridRedBlackCartesian(const GridBase *base,
const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid) : GridBase(processor_grid)
const std::vector<int> &processor_grid) : GridBase(processor_grid,*base)
{
std::vector<int> checker_dim_mask(dimensions.size(),1);
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0);
int checker_dim = 0;
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim);
}
#endif
void Init(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,

View File

@ -96,6 +96,113 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
GlobalSumVector((double *)c,2*N);
}
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
{
_ndimension = processors.size();
assert(_ndimension = parent._ndimension);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// split the communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
int Nparent;
MPI_Comm_size(parent.communicator,&Nparent);
int childsize=1;
for(int d=0;d<processors.size();d++) {
childsize *= processors[d];
}
int Nchild = Nparent/childsize;
assert (childsize * Nchild == Nparent);
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
std::vector<int> scoor(_ndimension); // coor of split within parent
std::vector<int> ssize(_ndimension); // coor of split within parent
for(int d=0;d<_ndimension;d++){
ccoor[d] = parent._processor_coor[d] % processors[d];
scoor[d] = parent._processor_coor[d] / processors[d];
ssize[d] = parent._processors[d]/ processors[d];
}
int crank,srank; // rank within subcomm ; rank of subcomm within blocks of subcomms
Lexicographic::IndexFromCoor(ccoor,crank,processors);
Lexicographic::IndexFromCoor(scoor,srank,ssize);
MPI_Comm comm_split;
if ( Nchild > 1 ) {
// std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
// std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
// for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
// std::cout<<std::endl;
// std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
// for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
// std::cout<<std::endl;
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
assert(ierr==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Declare victory
//////////////////////////////////////////////////////////////////////////////////////////////////////
// std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
// << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
} else {
comm_split=parent.communicator;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Set up from the new split communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
InitFromMPICommunicator(processors,comm_split);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Take an MPI_Comm and self assemble
//////////////////////////////////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
{
// if ( communicator_base != communicator_world ) {
// std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl;
// }
_ndimension = processors.size();
_processor_coor.resize(_ndimension);
/////////////////////////////////
// Count the requested nodes
/////////////////////////////////
_Nprocessors=1;
_processors = processors;
for(int i=0;i<_ndimension;i++){
_Nprocessors*=_processors[i];
}
std::vector<int> periodic(_ndimension,1);
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator);
MPI_Comm_rank(communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
int Size;
MPI_Comm_size(communicator,&Size);
#ifdef GRID_COMMS_MPIT
communicator_halo.resize (2*_ndimension);
for(int i=0;i<_ndimension*2;i++){
MPI_Comm_dup(communicator,&communicator_halo[i]);
}
#endif
assert(Size==_Nprocessors);
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
InitFromMPICommunicator(processors,communicator_world);
}
#endif
#if !defined( GRID_COMMS_MPI3)
int CartesianCommunicator::NodeCount(void) { return ProcessorCount();};
@ -147,8 +254,13 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
}
void CartesianCommunicator::ShmInitGeneric(void){
#if 1
int mmap_flag = MAP_SHARED | MAP_ANONYMOUS;
int mmap_flag =0;
#ifdef MAP_ANONYMOUS
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
#endif
#ifdef MAP_ANON
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON;
#endif
#ifdef MAP_HUGETLB
if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
#endif

View File

@ -83,6 +83,7 @@ class CartesianCommunicator {
std::vector<MPI_Comm> communicator_halo;
typedef MPI_Request CommsRequest_t;
#else
typedef int CommsRequest_t;
#endif
@ -149,10 +150,22 @@ class CartesianCommunicator {
static void Init(int *argc, char ***argv);
////////////////////////////////////////////////
// Constructor of any given grid
// Constructors to sub-divide a parent communicator
// and default to comm world
////////////////////////////////////////////////
CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent);
CartesianCommunicator(const std::vector<int> &pdimensions_in);
private:
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
////////////////////////////////////////////////
// Private initialise from an MPI communicator
// Can use after an MPI_Comm_split, but hidden from user so private
////////////////////////////////////////////////
void InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base);
#endif
public:
////////////////////////////////////////////////////////////////////////////////////////
// Wraps MPI_Cart routines, or implements equivalent on other impls
////////////////////////////////////////////////////////////////////////////////////////
@ -250,6 +263,23 @@ class CartesianCommunicator {
////////////////////////////////////////////////////////////
void Broadcast(int root,void* data, int bytes);
////////////////////////////////////////////////////////////
// All2All down one dimension
////////////////////////////////////////////////////////////
template<class T> void AllToAll(int dim,std::vector<T> &in, std::vector<T> &out){
assert(dim>=0);
assert(dim<_ndimension);
int numnode = _processors[dim];
// std::cerr << " AllToAll in.size() "<<in.size()<<std::endl;
// std::cerr << " AllToAll out.size() "<<out.size()<<std::endl;
assert(in.size()==out.size());
size_t bytes=(in.size()*sizeof(T))/numnode;
assert((bytes*numnode) == in.size()*sizeof(T));
AllToAll(dim,(void *)&in[0],(void *)&out[0],bytes);
}
void AllToAll(int dim ,void *in,void *out,int bytes);
void AllToAll(void *in,void *out,int bytes);
template<class obj> void Broadcast(int root,obj &data)
{
Broadcast(root,(void *)&data,sizeof(data));

View File

@ -52,29 +52,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
ShmInitGeneric();
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
_ndimension = processors.size();
std::vector<int> periodic(_ndimension,1);
_Nprocessors=1;
_processors = processors;
_processor_coor.resize(_ndimension);
MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
MPI_Comm_rank(communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
for(int i=0;i<_ndimension;i++){
_Nprocessors*=_processors[i];
}
int Size;
MPI_Comm_size(communicator,&Size);
assert(Size==_Nprocessors);
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
@ -210,6 +187,21 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
root,
communicator);
assert(ierr==0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes)
{
std::vector<int> row(_ndimension,1);
assert(dim>=0 && dim<_ndimension);
// Split the communicator
row[dim] = _processors[dim];
CartesianCommunicator Comm(row,*this);
Comm.AllToAll(in,out,bytes);
}
void CartesianCommunicator::AllToAll(void *in,void *out,int bytes)
{
MPI_Alltoall(in ,bytes,MPI_BYTE,out,bytes,MPI_BYTE,communicator);
}
///////////////////////////////////////////////////////
// Should only be used prior to Grid Init finished.
@ -230,5 +222,7 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
assert(ierr==0);
}
}

View File

@ -450,6 +450,15 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
assert(lr!=-1);
Lexicographic::CoorFromIndex(coor,lr,_processors);
}
//////////////////////////////////
// Try to subdivide communicator
//////////////////////////////////
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
: CartesianCommunicator(processors)
{
std::cout << "Attempts to split MPI3 communicators will fail until implemented" <<std::endl;
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
int ierr;

View File

@ -53,33 +53,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
ShmInitGeneric();
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
_ndimension = processors.size();
std::vector<int> periodic(_ndimension,1);
_Nprocessors=1;
_processors = processors;
_processor_coor.resize(_ndimension);
MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
MPI_Comm_rank(communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
for(int i=0;i<_ndimension;i++){
_Nprocessors*=_processors[i];
}
communicator_halo.resize (2*_ndimension);
for(int i=0;i<_ndimension*2;i++){
MPI_Comm_dup(communicator,&communicator_halo[i]);
}
int Size;
MPI_Comm_size(communicator,&Size);
assert(Size==_Nprocessors);
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);

View File

@ -38,6 +38,9 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
ShmInitGeneric();
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
: CartesianCommunicator(processors) {}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
_processors = processors;
@ -95,6 +98,10 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
{
assert(0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes)
{
bcopy(in,out,bytes);
}
int CartesianCommunicator::RankWorld(void){return 0;}
void CartesianCommunicator::Barrier(void){}

View File

@ -75,6 +75,11 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
ShmInitGeneric();
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
: CartesianCommunicator(processors)
{
std::cout << "Attempts to split SHMEM communicators will fail " <<std::endl;
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
_ndimension = processors.size();

View File

@ -63,7 +63,7 @@ SOFTWARE.
#error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers"
#endif
#elif defined(__GNUC__)
#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40900
#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40805
#error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers"
#endif
#endif

View File

@ -544,7 +544,6 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
for(int i=0;i<Nblock;i++){
for(int j=0;j<Nblock;j++){
auto tmp = innerProduct(Left[i],Right[j]);
// vector_typeD rtmp = TensorRemove(tmp);
auto rtmp = TensorRemove(tmp);
mat_thread(i,j) += Reduce(rtmp);
}}

View File

@ -685,5 +685,306 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
}
}
////////////////////////////////////////////////////////////////////////////////
// Communicate between grids
////////////////////////////////////////////////////////////////////////////////
//
// All to all plan
//
// Subvolume on fine grid is v. Vectors a,b,c,d
//
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// SIMPLEST CASE:
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Mesh of nodes (2) ; subdivide to 1 subdivisions
//
// Lex ord:
// N0 va0 vb0 N1 va1 vb1
//
// For each dimension do an all to all
//
// full AllToAll(0)
// N0 va0 va1 N1 vb0 vb1
//
// REARRANGE
// N0 va01 N1 vb01
//
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
// NB: Easiest to programme if keep in lex order.
//
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// SIMPLE CASE:
///////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Mesh of nodes (2x2) ; subdivide to 1x1 subdivisions
//
// Lex ord:
// N0 va0 vb0 vc0 vd0 N1 va1 vb1 vc1 vd1
// N2 va2 vb2 vc2 vd2 N3 va3 vb3 vc3 vd3
//
// Ratio = full[dim] / split[dim]
//
// For each dimension do an all to all; get Nvec -> Nvec / ratio
// Ldim -> Ldim * ratio
// LocalVol -> LocalVol * ratio
// full AllToAll(0)
// N0 va0 vb0 va1 vb1 N1 vc0 vd0 vc1 vd1
// N2 va2 vb2 va3 vb3 N3 vc2 vd2 vc3 vd3
//
// REARRANGE
// N0 va01 vb01 N1 vc01 vd01
// N2 va23 vb23 N3 vc23 vd23
//
// full AllToAll(1) // Not what is wanted. FIXME
// N0 va01 va23 N1 vc01 vc23
// N2 vb01 vb23 N3 vd01 vd23
//
// REARRANGE
// N0 va0123 N1 vc0123
// N2 vb0123 N3 vd0123
//
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
// NB: Easiest to programme if keep in lex order.
//
/////////////////////////////////////////////////////////
template<class Vobj>
void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
{
typedef typename Vobj::scalar_object Sobj;
int full_vecs = full.size();
assert(full_vecs>=1);
GridBase * full_grid = full[0]._grid;
GridBase *split_grid = split._grid;
int ndim = full_grid->_ndimension;
int full_nproc = full_grid->_Nprocessors;
int split_nproc =split_grid->_Nprocessors;
////////////////////////////////
// Checkerboard management
////////////////////////////////
int cb = full[0].checkerboard;
split.checkerboard = cb;
//////////////////////////////
// Checks
//////////////////////////////
assert(full_grid->_ndimension==split_grid->_ndimension);
for(int n=0;n<full_vecs;n++){
assert(full[n].checkerboard == cb);
for(int d=0;d<ndim;d++){
assert(full[n]._grid->_gdimensions[d]==split._grid->_gdimensions[d]);
assert(full[n]._grid->_fdimensions[d]==split._grid->_fdimensions[d]);
}
}
int nvector =full_nproc/split_nproc;
assert(nvector*split_nproc==full_nproc);
assert(nvector == full_vecs);
std::vector<int> ratio(ndim);
for(int d=0;d<ndim;d++){
ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d];
}
int lsites = full_grid->lSites();
Integer sz = lsites * nvector;
std::vector<Sobj> tmpdata(sz);
std::vector<Sobj> alldata(sz);
std::vector<Sobj> scalardata(lsites);
for(int v=0;v<nvector;v++){
unvectorizeToLexOrdArray(scalardata,full[v]);
parallel_for(int site=0;site<lsites;site++){
alldata[v*lsites+site] = scalardata[site];
}
}
int nvec = nvector; // Counts down to 1 as we collapse dims
std::vector<int> ldims = full_grid->_ldimensions;
std::vector<int> lcoor(ndim);
for(int d=0;d<ndim;d++){
if ( ratio[d] != 1 ) {
full_grid ->AllToAll(d,alldata,tmpdata);
//////////////////////////////////////////
//Local volume for this dimension is expanded by ratio of processor extents
// Number of vectors is decreased by same factor
// Rearrange to lexico for bigger volume
//////////////////////////////////////////
nvec /= ratio[d];
auto rdims = ldims; rdims[d] *= ratio[d];
auto rsites= lsites*ratio[d];
for(int v=0;v<nvec;v++){
// For loop over each site within old subvol
for(int lsite=0;lsite<lsites;lsite++){
Lexicographic::CoorFromIndex(lcoor, lsite, ldims);
for(int r=0;r<ratio[d];r++){ // ratio*nvec terms
auto rcoor = lcoor; rcoor[d] += r*ldims[d];
int rsite; Lexicographic::IndexFromCoor(rcoor, rsite, rdims);
rsite += v * rsites;
int rmul=nvec*lsites;
int vmul= lsites;
alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul];
}
}
}
ldims[d]*= ratio[d];
lsites *= ratio[d];
if ( split_grid->_processors[d] > 1 ) {
tmpdata = alldata;
split_grid->AllToAll(d,tmpdata,alldata);
}
}
}
vectorizeFromLexOrdArray(alldata,split);
}
template<class Vobj>
void Grid_split(Lattice<Vobj> &full,Lattice<Vobj> & split)
{
int nvector = full._grid->_Nprocessors / split._grid->_Nprocessors;
std::vector<Lattice<Vobj> > full_v(nvector,full._grid);
for(int n=0;n<nvector;n++){
full_v[n] = full;
}
Grid_split(full_v,split);
}
template<class Vobj>
void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
{
typedef typename Vobj::scalar_object Sobj;
int full_vecs = full.size();
assert(full_vecs>=1);
GridBase * full_grid = full[0]._grid;
GridBase *split_grid = split._grid;
int ndim = full_grid->_ndimension;
int full_nproc = full_grid->_Nprocessors;
int split_nproc =split_grid->_Nprocessors;
////////////////////////////////
// Checkerboard management
////////////////////////////////
int cb = full[0].checkerboard;
split.checkerboard = cb;
//////////////////////////////
// Checks
//////////////////////////////
assert(full_grid->_ndimension==split_grid->_ndimension);
for(int n=0;n<full_vecs;n++){
assert(full[n].checkerboard == cb);
for(int d=0;d<ndim;d++){
assert(full[n]._grid->_gdimensions[d]==split._grid->_gdimensions[d]);
assert(full[n]._grid->_fdimensions[d]==split._grid->_fdimensions[d]);
}
}
int nvector =full_nproc/split_nproc;
assert(nvector*split_nproc==full_nproc);
assert(nvector == full_vecs);
std::vector<int> ratio(ndim);
for(int d=0;d<ndim;d++){
ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d];
}
int lsites = full_grid->lSites();
Integer sz = lsites * nvector;
std::vector<Sobj> tmpdata(sz);
std::vector<Sobj> alldata(sz);
std::vector<Sobj> scalardata(lsites);
unvectorizeToLexOrdArray(alldata,split);
/////////////////////////////////////////////////////////////////
// Start from split grid and work towards full grid
/////////////////////////////////////////////////////////////////
std::vector<int> lcoor(ndim);
std::vector<int> rcoor(ndim);
int nvec = 1;
lsites = split_grid->lSites();
std::vector<int> ldims = split_grid->_ldimensions;
for(int d=ndim-1;d>=0;d--){
if ( ratio[d] != 1 ) {
if ( split_grid->_processors[d] > 1 ) {
tmpdata = alldata;
split_grid->AllToAll(d,tmpdata,alldata);
}
//////////////////////////////////////////
//Local volume for this dimension is expanded by ratio of processor extents
// Number of vectors is decreased by same factor
// Rearrange to lexico for bigger volume
//////////////////////////////////////////
auto rsites= lsites/ratio[d];
auto rdims = ldims; rdims[d]/=ratio[d];
for(int v=0;v<nvec;v++){
// rsite, rcoor --> smaller local volume
// lsite, lcoor --> bigger original (single node?) volume
// For loop over each site within smaller subvol
for(int rsite=0;rsite<rsites;rsite++){
Lexicographic::CoorFromIndex(rcoor, rsite, rdims);
int lsite;
for(int r=0;r<ratio[d];r++){
lcoor = rcoor; lcoor[d] += r*rdims[d];
Lexicographic::IndexFromCoor(lcoor, lsite, ldims); lsite += v * lsites;
int rmul=nvec*rsites;
int vmul= rsites;
tmpdata[rsite+r*rmul+v*vmul]=alldata[lsite];
}
}
}
nvec *= ratio[d];
ldims[d]=rdims[d];
lsites =rsites;
full_grid ->AllToAll(d,tmpdata,alldata);
}
}
lsites = full_grid->lSites();
for(int v=0;v<nvector;v++){
parallel_for(int site=0;site<lsites;site++){
scalardata[site] = alldata[v*lsites+site];
}
assert(v<full.size());
vectorizeFromLexOrdArray(scalardata,full[v]);
}
}
}
#endif

View File

@ -84,10 +84,6 @@ namespace QCD {
stream << "GRID_";
stream << ScidacWordMnemonic<stype>();
// std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<<std::endl;
// std::cout << " Spin N/S/V/M : " << _SpinN <<" "<<_SpinScalar <<"/"<<_SpinVector <<"/"<<_SpinMatrix<<std::endl;
// std::cout << " Colour N/S/V/M : " << _ColourN <<" "<<_ColourScalar <<"/"<<_ColourVector <<"/"<<_ColourMatrix<<std::endl;
if ( _LorentzVector ) stream << "_LorentzVector"<<_LorentzN;
if ( _LorentzMatrix ) stream << "_LorentzMatrix"<<_LorentzN;
@ -182,7 +178,7 @@ class GridLimeReader : public BinaryIO {
/////////////////////////////////////////////
// Open the file
/////////////////////////////////////////////
void open(std::string &_filename)
void open(const std::string &_filename)
{
filename= _filename;
File = fopen(filename.c_str(), "r");
@ -210,19 +206,33 @@ class GridLimeReader : public BinaryIO {
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
std::cout << GridLogMessage << limeReaderType(LimeR) <<std::endl;
uint64_t file_bytes =limeReaderBytes(LimeR);
if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage << limeReaderType(LimeR) << " "<< file_bytes <<" bytes "<<std::endl;
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage<< " readLimeLatticeBinaryObject matches ! " <<std::endl;
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
// std::cout << "R sizeof(sobj)= " <<sizeof(sobj)<<std::endl;
// std::cout << "R Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "R Payload expected " <<PayloadSize<<std::endl;
// std::cout << "R file size " <<file_bytes <<std::endl;
assert(PayloadSize == file_bytes);// Must match or user error
off_t offset= ftell(File);
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
/////////////////////////////////////////////
// Insist checksum is next record
/////////////////////////////////////////////
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name);
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
/////////////////////////////////////////////
// Verify checksums
@ -242,11 +252,19 @@ class GridLimeReader : public BinaryIO {
// should this be a do while; can we miss a first record??
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage<< " readLimeObject matches ! " << record_name <<std::endl;
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
XmlReader RD(&xmlc[0],"");
read(RD,object_name,object);
return;
@ -261,13 +279,14 @@ class GridLimeWriter : public BinaryIO {
public:
///////////////////////////////////////////////////
// FIXME: format for RNG? Now just binary out instead
// FIXME: collective calls or not ?
// : must know if I am the I/O boss
///////////////////////////////////////////////////
FILE *File;
LimeWriter *LimeW;
std::string filename;
void open(std::string &_filename) {
void open(const std::string &_filename) {
filename= _filename;
File = fopen(filename.c_str(), "w");
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
@ -302,14 +321,18 @@ class GridLimeWriter : public BinaryIO {
write(WR,object_name,object);
xmlstring = WR.XmlString();
}
// std::cout << "WriteLimeObject" << record_name <<std::endl;
uint64_t nbytes = xmlstring.size();
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
int err;
LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL);
LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes);
assert(h!= NULL);
err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
limeDestroyHeader(h);
// std::cout << " File offset is now"<<ftell(File) << std::endl;
}
////////////////////////////////////////////
// Write a generic lattice field and csum
@ -326,6 +349,11 @@ class GridLimeWriter : public BinaryIO {
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
////////////////////////////////////////////////////////////////////
// NB: FILE and iostream are jointly writing disjoint sequences in the
// the same file through different file handles (integer units).
@ -340,6 +368,7 @@ class GridLimeWriter : public BinaryIO {
// v) Continue writing scidac record.
////////////////////////////////////////////////////////////////////
off_t offset = ftell(File);
// std::cout << " Writing to offset "<<offset << std::endl;
std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
@ -354,7 +383,7 @@ class GridLimeWriter : public BinaryIO {
checksum.suma= streama.str();
checksum.sumb= streamb.str();
std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
writeLimeObject(0,1,checksum,std::string("scidacChecksum" ),std::string(SCIDAC_CHECKSUM));
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
}
};
@ -374,8 +403,6 @@ class ScidacWriter : public GridLimeWriter {
template <class vobj, class userRecord>
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)
{
typedef typename vobj::scalar_object sobj;
uint64_t nbytes;
GridBase * grid = field._grid;
////////////////////////////////////////
@ -397,6 +424,66 @@ class ScidacWriter : public GridLimeWriter {
}
};
class ScidacReader : public GridLimeReader {
public:
template<class SerialisableUserFile>
void readScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
{
scidacFile _scidacFile(grid);
readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
}
////////////////////////////////////////////////
// Write generic lattice field in scidac format
////////////////////////////////////////////////
template <class vobj, class userRecord>
void readScidacFieldRecord(Lattice<vobj> &field,userRecord &_userRecord)
{
typedef typename vobj::scalar_object sobj;
GridBase * grid = field._grid;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////
FieldMetaData header;
scidacRecord _scidacRecord;
scidacFile _scidacFile;
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
readLimeObject(_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
readLimeObject(_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));
}
void skipPastBinaryRecord(void) {
std::string rec_name(ILDG_BINARY_DATA);
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
skipPastObjectRecord(std::string(SCIDAC_CHECKSUM));
return;
}
}
}
void skipPastObjectRecord(std::string rec_name) {
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
return;
}
}
}
void skipScidacFieldRecord() {
skipPastObjectRecord(std::string(GRID_FORMAT));
skipPastObjectRecord(std::string(SCIDAC_RECORD_XML));
skipPastObjectRecord(std::string(SCIDAC_PRIVATE_RECORD_XML));
skipPastBinaryRecord();
}
};
class IldgWriter : public ScidacWriter {
public:
@ -425,8 +512,6 @@ class IldgWriter : public ScidacWriter {
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
uint64_t nbytes;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////

View File

@ -64,6 +64,11 @@ namespace Grid {
// file compatability, so should be correct to assume the undocumented but defacto file structure.
/////////////////////////////////////////////////////////////////////////////////
struct emptyUserRecord : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(emptyUserRecord,int,dummy);
emptyUserRecord() { dummy=0; };
};
////////////////////////
// Scidac private file xml
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>

View File

@ -85,6 +85,9 @@ namespace Grid {
nd=4;
dimension.resize(4);
boundary.resize(4);
scidac_checksuma=0;
scidac_checksumb=0;
checksum=0;
}
};
@ -104,6 +107,7 @@ namespace Grid {
header.nd = nd;
header.dimension.resize(nd);
header.boundary.resize(nd);
header.data_start = 0;
for(int d=0;d<nd;d++) {
header.dimension[d] = grid->_fdimensions[d];
}

View File

@ -0,0 +1,100 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/AbstractEOFAFermion.h
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ABSTRACT_EOFA_FERMION_H
#define GRID_QCD_ABSTRACT_EOFA_FERMION_H
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
namespace Grid {
namespace QCD {
// DJM: Abstract base class for EOFA fermion types.
// Defines layout of additional EOFA-specific parameters and operators.
// Use to construct EOFA pseudofermion actions that are agnostic to
// Shamir / Mobius / etc., and ensure that no one can construct EOFA
// pseudofermion action with non-EOFA fermion type.
template<class Impl>
class AbstractEOFAFermion : public CayleyFermion5D<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm}
RealD mq1;
RealD mq2;
RealD mq3;
RealD shift;
int pm;
RealD alpha; // Mobius scale
RealD k; // EOFA normalization constant
virtual void Instantiatable(void) = 0;
// EOFA-specific operations
// Force user to implement in derived classes
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0;
virtual void Dtilde (const FermionField& in, FermionField& out) = 0;
virtual void DtildeInv(const FermionField& in, FermionField& out) = 0;
// Implement derivatives in base class:
// for EOFA both DWF and Mobius just need d(Dw)/dU
virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDeriv(mat, U, V, dag);
};
virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDerivOE(mat, U, V, dag);
};
virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
this->DhopDerivEO(mat, U, V, dag);
};
// Recompute 5D coefficients for different value of shift constant
// (needed for heatbath loop over poles)
virtual void RefreshShiftCoefficients(RealD new_shift) = 0;
// Constructors
AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm,
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams())
: CayleyFermion5D<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,
_mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm)
{
int Ls = this->Ls;
this->alpha = _b + _c;
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
};
};
}}
#endif

View File

@ -77,7 +77,6 @@ void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi
}
}
template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
{
this->Report();
@ -119,7 +118,6 @@ template<class Impl> void CayleyFermion5D<Impl>::CayleyZeroCounters(void)
MooeeInvTime=0;
}
template<class Impl>
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{

View File

@ -179,9 +179,9 @@ namespace Grid {
double MooeeInvTime;
protected:
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
virtual void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
};
}

View File

@ -0,0 +1,438 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
template<class Impl>
DomainWallEOFAFermion<Impl>::DomainWallEOFAFermion(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3,
RealD _shift, int _pm, RealD _M5, const ImplParams &p) :
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
_shift, _pm, _M5, 1.0, 0.0, p)
{
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);
assert(zdata->n == this->Ls);
std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl;
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
Approx::zolotarev_free(zdata);
}
/***************************************************************
* Additional EOFA operators only called outside the inverter.
* Since speed is not essential, simple axpby-style
* implementations should be fine.
***************************************************************/
template<class Impl>
void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
{
int Ls = this->Ls;
Din = zero;
if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); }
else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); }
else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
}
// This is just the identity for DWF
template<class Impl>
void DomainWallEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; }
// This is just the identity for DWF
template<class Impl>
void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; }
/*****************************************************************************************************/
template<class Impl>
RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->Meooe5D(psi, Din);
this->DW(Din, chi, DaggerNo);
axpby(chi, 1.0, 1.0, chi, psi);
this->M5D(psi, chi);
return(norm2(chi));
}
template<class Impl>
RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->DW(psi, Din, DaggerYes);
this->MeooeDag5D(Din, chi);
this->M5Ddag(psi, chi);
axpby(chi, 1.0, 1.0, chi, psi);
return(norm2(chi));
}
/********************************************************************
* Performance critical fermion operators called inside the inverter
********************************************************************/
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftp;
#if(0)
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl;
for(int i=0; i<diag.size(); ++i){
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
}
for(int i=0; i<upper.size(); ++i){
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
}
for(int i=0; i<lower.size(); ++i){
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
}
#endif
this->M5D(psi, chi, chi, lower, diag, upper);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
int pm = this->pm;
RealD shift = this->shift;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
Coeff_t shiftp(0.0), shiftm(0.0);
if(shift != 0.0){
if(pm == 1){ shiftp = shift*(mq3-mq2); }
else{ shiftm = -shift*(mq3-mq2); }
}
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftm;
#if(0)
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5Ddag(FF&,FF&):" << std::endl;
for(int i=0; i<diag.size(); ++i){
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
}
for(int i=0; i<upper.size(); ++i){
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
}
for(int i=0; i<lower.size(); ++i){
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
}
#endif
this->M5Ddag(psi, chi, chi, lower, diag, upper);
}
// half checkerboard operations
template<class Impl>
void DomainWallEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dm;
lower[0] = this->dp;
this->M5D(psi, psi, chi, lower, diag, upper);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] = this->dp;
lower[0] = this->dm;
this->M5Ddag(psi, psi, chi, lower, diag, upper);
}
/****************************************************************************************/
//Zolo
template<class Impl>
void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c)
{
int Ls = this->Ls;
int pm = this->pm;
RealD mq1 = this->mq1;
RealD mq2 = this->mq2;
RealD mq3 = this->mq3;
RealD shift = this->shift;
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
this->bs.resize(Ls);
this->cs.resize(Ls);
this->aee.resize(Ls);
this->aeo.resize(Ls);
this->bee.resize(Ls);
this->beo.resize(Ls);
this->cee.resize(Ls);
this->ceo.resize(Ls);
for(int i=0; i<Ls; ++i){
this->bee[i] = 4.0 - this->M5 + 1.0;
this->cee[i] = 1.0;
}
for(int i=0; i<Ls; ++i){
this->aee[i] = this->cee[i];
this->bs[i] = this->beo[i] = 1.0;
this->cs[i] = this->ceo[i] = 0.0;
}
//////////////////////////////////////////
// EOFA shift terms
//////////////////////////////////////////
if(pm == 1){
this->dp = mq1*this->cee[0] + shift*(mq3-mq2);
this->dm = mq1*this->cee[Ls-1];
} else if(this->pm == -1) {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1] - shift*(mq3-mq2);
} else {
this->dp = mq1*this->cee[0];
this->dm = mq1*this->cee[Ls-1];
}
//////////////////////////////////////////
// LDU decomposition of eeoo
//////////////////////////////////////////
this->dee.resize(Ls+1);
this->lee.resize(Ls);
this->leem.resize(Ls);
this->uee.resize(Ls);
this->ueem.resize(Ls);
for(int i=0; i<Ls; ++i){
if(i < Ls-1){
this->lee[i] = -this->cee[i+1]/this->bee[i]; // sub-diag entry on the ith column
this->leem[i] = this->dm/this->bee[i];
for(int j=0; j<i; j++){ this->leem[i] *= this->aee[j]/this->bee[j]; }
this->dee[i] = this->bee[i];
this->uee[i] = -this->aee[i]/this->bee[i]; // up-diag entry on the ith row
this->ueem[i] = this->dp / this->bee[0];
for(int j=1; j<=i; j++){ this->ueem[i] *= this->cee[j]/this->bee[j]; }
} else {
this->lee[i] = 0.0;
this->leem[i] = 0.0;
this->uee[i] = 0.0;
this->ueem[i] = 0.0;
}
}
{
Coeff_t delta_d = 1.0 / this->bee[0];
for(int j=1; j<Ls-1; j++){ delta_d *= this->cee[j] / this->bee[j]; }
this->dee[Ls-1] = this->bee[Ls-1] + this->cee[0] * this->dm * delta_d;
this->dee[Ls] = this->bee[Ls-1] + this->cee[Ls-1] * this->dp * delta_d;
}
int inv = 1;
this->MooeeInternalCompute(0, inv, this->MatpInv, this->MatmInv);
this->MooeeInternalCompute(1, inv, this->MatpInvDag, this->MatmInvDag);
}
// Recompute Cayley-form coefficients for different shift
template<class Impl>
void DomainWallEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
Approx::zolotarev_data *zdata = Approx::higham(1.0, this->Ls);
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
int Ls = this->Ls;
GridBase* grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0];
if(LLs == Ls){ return; } // Not vectorised in 5th direction
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0; s<Ls; s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->dp;
Pminus(Ls-1,0) = this->dm;
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
#if(0)
std::cout << GridLogMessage << "Pplus:" << std::endl;
for(int s=0; s<Ls; ++s){
for(int ss=0; ss<Ls; ++ss){
std::cout << Pplus(s,ss) << "\t";
}
std::cout << std::endl;
}
std::cout << GridLogMessage << "Pminus:" << std::endl;
for(int s=0; s<Ls; ++s){
for(int ss=0; ss<Ls; ++ss){
std::cout << Pminus(s,ss) << "\t";
}
std::cout << std::endl;
}
#endif
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
typedef typename SiteHalfSpinor::scalar_type scalar_type;
const int Nsimd = Simd::Nsimd();
Matp.resize(Ls*LLs);
Matm.resize(Ls*LLs);
for(int s2=0; s2<Ls; s2++){
for(int s1=0; s1<LLs; s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type*) &Vp;
scalar_type *sm = (scalar_type*) &Vm;
for(int l=0; l<Nsimd; l++){
if(switcheroo<Coeff_t>::iscomplex()) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
}
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
}
FermOpTemplateInstantiate(DomainWallEOFAFermion);
GparityFermOpTemplateInstantiate(DomainWallEOFAFermion);
}}

View File

@ -0,0 +1,115 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.h
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
#define GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
namespace Grid {
namespace QCD {
template<class Impl>
class DomainWallEOFAFermion : public AbstractEOFAFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Modified (0,Ls-1) and (Ls-1,0) elements of Mooee
// for red-black preconditioned Shamir EOFA
Coeff_t dm;
Coeff_t dp;
virtual void Instantiatable(void) {};
// EOFA-specific operations
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
virtual void Dtilde (const FermionField& in, FermionField& out);
virtual void DtildeInv (const FermionField& in, FermionField& out);
// override multiply
virtual RealD M (const FermionField& in, FermionField& out);
virtual RealD Mdag (const FermionField& in, FermionField& out);
// half checkerboard operations
virtual void Mooee (const FermionField& in, FermionField& out);
virtual void MooeeDag (const FermionField& in, FermionField& out);
virtual void MooeeInv (const FermionField& in, FermionField& out);
virtual void MooeeInvDag(const FermionField& in, FermionField& out);
virtual void M5D (const FermionField& psi, FermionField& chi);
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
virtual void RefreshShiftCoefficients(RealD new_shift);
// Constructors
DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
RealD _M5, const ImplParams& p=ImplParams());
protected:
void SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c);
};
}}
#define INSTANTIATE_DPERP_DWF_EOFA(A)\
template void DomainWallEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void DomainWallEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void DomainWallEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
template void DomainWallEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi);
#undef DOMAIN_WALL_EOFA_DPERP_DENSE
#define DOMAIN_WALL_EOFA_DPERP_CACHE
#undef DOMAIN_WALL_EOFA_DPERP_LINALG
#define DOMAIN_WALL_EOFA_DPERP_VEC
#endif

View File

@ -0,0 +1,248 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards..
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
int Ls = this->Ls;
GridBase* grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0) {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
int Ls = this->Ls;
GridBase* grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){
if(s==0) {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
chi[ss] = psi[ss]; // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1;
}
spProj5m(tmp2, chi[ss+Ls-1]);
chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2;
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp1, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
assert(psi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
std::vector<Coeff_t> ueec(Ls);
std::vector<Coeff_t> deec(Ls+1);
std::vector<Coeff_t> leec(Ls);
std::vector<Coeff_t> ueemc(Ls);
std::vector<Coeff_t> leemc(Ls);
for(int s=0; s<ueec.size(); s++){
ueec[s] = conjugate(this->uee[s]);
deec[s] = conjugate(this->dee[s]);
leec[s] = conjugate(this->lee[s]);
ueemc[s] = conjugate(this->ueem[s]);
leemc[s] = conjugate(this->leem[s]);
}
deec[Ls] = conjugate(this->dee[Ls]);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
// Apply (U^{\prime})^{-dagger}
chi[ss] = psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - ueec[s-1]*tmp1;
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - ueemc[s]*tmp1;
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/deec[s])*chi[ss+s] - (leemc[s]/deec[Ls-1])*tmp1;
}
spProj5p(tmp2, chi[ss+Ls-1]);
chi[ss+Ls-1] = (1.0/deec[Ls-1])*tmp1 + (1.0/deec[Ls])*tmp2;
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp1, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - leec[s]*tmp1;
}
}
this->MooeeInvTime += usecond();
}
#ifdef DOMAIN_WALL_EOFA_DPERP_CACHE
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -0,0 +1,159 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard = psi.checkerboard;
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->dp;
Pminus(Ls-1,0) = this->dm;
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0; site<vol; site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = zero;
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
}
if(PminusMat(s1,s2) != 0.0){
spProj5m(SitePminus, psi[lex2]);
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
#ifdef DOMAIN_WALL_EOFA_DPERP_DENSE
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
template void DomainWallEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
template void DomainWallEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -0,0 +1,168 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1);
}
axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1);
axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (U^{\prime})^{-dagger}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1);
axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
}
}
#ifdef DOMAIN_WALL_EOFA_DPERP_LINALG
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -0,0 +1,605 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
assert(Nc == 3);
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5m(hp, psi[ss+vp]);
spProj5p(hm, psi[ss+vm]);
if (vp <= v){ rotate(hp, hp, 1); }
if (vm >= v){ rotate(hm, hm, nsimd-1); }
hp = 0.5*hp;
hm = 0.5*hm;
spRecon5m(fp, hp);
spRecon5p(fm, hm);
chi[ss+v] = d[v]*phi[ss+v];
chi[ss+v] = chi[ss+v] + u[v]*fp;
chi[ss+v] = chi[ss+v] + l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v==LLs-1) ? 0 : v+1;
int vm = (v==0) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if(vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5p(hp, psi[ss+vp]);
spProj5m(hm, psi[ss+vm]);
if(vp <= v){ rotate(hp, hp, 1); }
if(vm >= v){ rotate(hm, hm, nsimd-1); }
hp = hp*0.5;
hm = hm*0.5;
spRecon5p(fp, hp);
spRecon5m(fm, hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if (vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
#ifdef AVX512
#include<simd/Intel512common.h>
#include<simd/Intel512avx.h>
#include<simd/Intel512single.h>
#endif
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
int s = s2 + l*LLs;
int lex = s2 + LLs*site;
if( s2==0 && l==0 ){
SiteChiP=zero;
SiteChiM=zero;
}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
}}
}}
{
int lex = s1 + LLs*site;
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %%zmm1
#define Chi_01 %%zmm2
#define Chi_02 %%zmm3
#define Chi_10 %%zmm4
#define Chi_11 %%zmm5
#define Chi_12 %%zmm6
#define Chi_20 %%zmm7
#define Chi_21 %%zmm8
#define Chi_22 %%zmm9
#define Chi_30 %%zmm10
#define Chi_31 %%zmm11
#define Chi_32 %%zmm12
#define BCAST0 %%zmm13
#define BCAST1 %%zmm14
#define BCAST2 %%zmm15
#define BCAST3 %%zmm16
#define BCAST4 %%zmm17
#define BCAST5 %%zmm18
#define BCAST6 %%zmm19
#define BCAST7 %%zmm20
#define BCAST8 %%zmm21
#define BCAST9 %%zmm22
#define BCAST10 %%zmm23
#define BCAST11 %%zmm24
int incr = LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
int lex = s2 + LLs*site;
uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t) &psi[lex];
for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
if((s2+l)==0) {
asm(
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
VBCASTCDUP(0,%2,BCAST0)
VBCASTCDUP(1,%2,BCAST1)
VBCASTCDUP(2,%2,BCAST2)
VBCASTCDUP(3,%2,BCAST3)
VBCASTCDUP(4,%2,BCAST4) VMULMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(5,%2,BCAST5) VMULMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(6,%2,BCAST6) VMULMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(7,%2,BCAST7) VMULMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(8,%2,BCAST8) VMULMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(9,%2,BCAST9) VMULMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(10,%2,BCAST10) VMULMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(11,%2,BCAST11) VMULMEM(0,%1,BCAST7,Chi_21)
VMULMEM(0,%1,BCAST8,Chi_22)
VMULMEM(0,%1,BCAST9,Chi_30)
VMULMEM(0,%1,BCAST10,Chi_31)
VMULMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
} else {
asm(
VBCASTCDUP(0,%2,BCAST0) VMADDMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(1,%2,BCAST1) VMADDMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(2,%2,BCAST2) VMADDMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(3,%2,BCAST3) VMADDMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(4,%2,BCAST4) VMADDMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(5,%2,BCAST5) VMADDMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(6,%2,BCAST6) VMADDMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(7,%2,BCAST7) VMADDMEM(0,%1,BCAST7,Chi_21)
VBCASTCDUP(8,%2,BCAST8) VMADDMEM(0,%1,BCAST8,Chi_22)
VBCASTCDUP(9,%2,BCAST9) VMADDMEM(0,%1,BCAST9,Chi_30)
VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
}
a0 = a0 + incr;
a1 = a1 + incr;
a2 = a2 + sizeof(Simd::scalar_type);
}
}
{
int lexa = s1+LLs*site;
asm (
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
// Z-mobius version
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
exit(-1);
};
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard = psi.checkerboard;
Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm;
Vector<iSinglet<Simd> > *_Matp;
Vector<iSinglet<Simd> > *_Matm;
// MooeeInternalCompute(dag,inv,Matp,Matm);
if(inv && dag){
_Matp = &this->MatpInvDag;
_Matm = &this->MatmInvDag;
}
if(inv && (!dag)){
_Matp = &this->MatpInv;
_Matm = &this->MatmInv;
}
if(!inv){
MooeeInternalCompute(dag, inv, Matp, Matm);
_Matp = &Matp;
_Matm = &Matm;
}
assert(_Matp->size() == Ls*LLs);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
if(switcheroo<Coeff_t>::iscomplex()){
parallel_for(auto site=0; site<vol; site++){
MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
} else {
parallel_for(auto site=0; site<vol; site++){
MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
}
this->MooeeInvTime += usecond();
}
#ifdef DOMAIN_WALL_EOFA_DPERP_VEC
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplD);
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplF);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplF);
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplDF);
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplFH);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplDF);
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplFH);
template void DomainWallEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void DomainWallEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -38,6 +38,8 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
// - ContinuedFractionFermion5D.cc
// - WilsonFermion.cc
// - WilsonKernels.cc
// - DomainWallEOFAFermion.cc
// - MobiusEOFAFermion.cc
//
// The explicit instantiation is only avoidable if we move this source to headers and end up with include/parse/recompile
// for EVERY .cc file. This define centralises the list and restores global push of impl cases
@ -55,8 +57,9 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> // Cayley types
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
#include <Grid/qcd/action/fermion/MobiusFermion.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
#include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h>
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
@ -113,6 +116,14 @@ typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL;
typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH;
typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF;
typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR;
typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF;
typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD;
typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL;
typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH;
typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF;
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
@ -121,6 +132,14 @@ typedef MobiusFermion<WilsonImplRL> MobiusFermionRL;
typedef MobiusFermion<WilsonImplFH> MobiusFermionFH;
typedef MobiusFermion<WilsonImplDF> MobiusFermionDF;
typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR;
typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF;
typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD;
typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL;
typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH;
typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF;
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
@ -138,6 +157,14 @@ typedef DomainWallFermion<DomainWallVec5dImplRL> DomainWallFermionVec5dRL;
typedef DomainWallFermion<DomainWallVec5dImplFH> DomainWallFermionVec5dFH;
typedef DomainWallFermion<DomainWallVec5dImplDF> DomainWallFermionVec5dDF;
typedef DomainWallEOFAFermion<DomainWallVec5dImplR> DomainWallEOFAFermionVec5dR;
typedef DomainWallEOFAFermion<DomainWallVec5dImplF> DomainWallEOFAFermionVec5dF;
typedef DomainWallEOFAFermion<DomainWallVec5dImplD> DomainWallEOFAFermionVec5dD;
typedef DomainWallEOFAFermion<DomainWallVec5dImplRL> DomainWallEOFAFermionVec5dRL;
typedef DomainWallEOFAFermion<DomainWallVec5dImplFH> DomainWallEOFAFermionVec5dFH;
typedef DomainWallEOFAFermion<DomainWallVec5dImplDF> DomainWallEOFAFermionVec5dDF;
typedef MobiusFermion<DomainWallVec5dImplR> MobiusFermionVec5dR;
typedef MobiusFermion<DomainWallVec5dImplF> MobiusFermionVec5dF;
typedef MobiusFermion<DomainWallVec5dImplD> MobiusFermionVec5dD;
@ -146,6 +173,14 @@ typedef MobiusFermion<DomainWallVec5dImplRL> MobiusFermionVec5dRL;
typedef MobiusFermion<DomainWallVec5dImplFH> MobiusFermionVec5dFH;
typedef MobiusFermion<DomainWallVec5dImplDF> MobiusFermionVec5dDF;
typedef MobiusEOFAFermion<DomainWallVec5dImplR> MobiusEOFAFermionVec5dR;
typedef MobiusEOFAFermion<DomainWallVec5dImplF> MobiusEOFAFermionVec5dF;
typedef MobiusEOFAFermion<DomainWallVec5dImplD> MobiusEOFAFermionVec5dD;
typedef MobiusEOFAFermion<DomainWallVec5dImplRL> MobiusEOFAFermionVec5dRL;
typedef MobiusEOFAFermion<DomainWallVec5dImplFH> MobiusEOFAFermionVec5dFH;
typedef MobiusEOFAFermion<DomainWallVec5dImplDF> MobiusEOFAFermionVec5dDF;
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusFermionVec5dR;
typedef ZMobiusFermion<ZDomainWallVec5dImplF> ZMobiusFermionVec5dF;
typedef ZMobiusFermion<ZDomainWallVec5dImplD> ZMobiusFermionVec5dD;
@ -206,6 +241,14 @@ typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL;
typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH;
typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF;
typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR;
typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF;
typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD;
typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL;
typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH;
typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF;
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
@ -222,6 +265,14 @@ typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL;
typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH;
typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF;
typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR;
typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF;
typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD;
typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL;
typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH;
typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF;
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;

View File

@ -0,0 +1,502 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
template<class Impl>
MobiusEOFAFermion<Impl>::MobiusEOFAFermion(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3,
RealD _shift, int _pm, RealD _M5,
RealD _b, RealD _c, const ImplParams &p) :
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
_shift, _pm, _M5, _b, _c, p)
{
int Ls = this->Ls;
RealD eps = 1.0;
Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls);
assert(zdata->n == this->Ls);
std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b <<
",c=" << _c << ") with Ls=" << Ls << std::endl;
this->SetCoefficientsTanh(zdata, _b, _c);
std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 <<
",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift <<
",pm=" << _pm << ")" << std::endl;
Approx::zolotarev_free(zdata);
if(_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
Mooee_shift.resize(Ls, 0.0);
MooeeInv_shift_lc.resize(Ls, 0.0);
MooeeInv_shift_norm.resize(Ls, 0.0);
MooeeInvDag_shift_lc.resize(Ls, 0.0);
MooeeInvDag_shift_norm.resize(Ls, 0.0);
}
}
/****************************************************************
* Additional EOFA operators only called outside the inverter.
* Since speed is not essential, simple axpby-style
* implementations should be fine.
***************************************************************/
template<class Impl>
void MobiusEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
{
int Ls = this->Ls;
RealD alpha = this->alpha;
Din = zero;
if((sign == 1) && (dag == 0)) { // \Omega_{+}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,Ls-s-1)/std::pow(1.0+alpha,Ls-s), psi, s, 0);
}
} else if((sign == -1) && (dag == 0)) { // \Omega_{-}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,s)/std::pow(1.0+alpha,s+1), psi, s, 0);
}
} else if((sign == 1 ) && (dag == 1)) { // \Omega_{+}^{\dagger}
for(int sp=0; sp<Ls; ++sp){
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,Ls-sp-1)/std::pow(1.0+alpha,Ls-sp), psi, 0, sp);
}
} else if((sign == -1) && (dag == 1)) { // \Omega_{-}^{\dagger}
for(int sp=0; sp<Ls; ++sp){
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,sp)/std::pow(1.0+alpha,sp+1), psi, 0, sp);
}
}
}
// This is the operator relating the usual Ddwf to TWQCD's EOFA Dirac operator (arXiv:1706.05843, Eqn. 6).
// It also relates the preconditioned and unpreconditioned systems described in Appendix B.2.
template<class Impl>
void MobiusEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD b = 0.5 * ( 1.0 + this->alpha );
RealD c = 0.5 * ( 1.0 - this->alpha );
RealD mq1 = this->mq1;
for(int s=0; s<Ls; ++s){
if(s == 0) {
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
axpby_ssp_pplus (chi, 1.0, chi, mq1*c, psi, s, Ls-1);
} else if(s == (Ls-1)) {
axpby_ssp_pminus(chi, b, psi, mq1*c, psi, s, 0);
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
} else {
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
RealD m = this->mq1;
RealD c = 0.5 * this->alpha;
RealD d = 0.5;
RealD DtInv_p(0.0), DtInv_m(0.0);
RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls);
FermionField tmp(this->FermionGrid());
for(int s=0; s<Ls; ++s){
for(int sp=0; sp<Ls; ++sp){
DtInv_p = m * std::pow(-1.0,s-sp+1) * std::pow(c-d,Ls+s-sp) / std::pow(c+d,s-sp+1) / N;
DtInv_p += (s < sp) ? 0.0 : std::pow(-1.0,s-sp) * std::pow(c-d,s-sp) / std::pow(c+d,s-sp+1);
DtInv_m = m * std::pow(-1.0,sp-s+1) * std::pow(c-d,Ls+sp-s) / std::pow(c+d,sp-s+1) / N;
DtInv_m += (s > sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1);
if(sp == 0){
axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp);
} else {
axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
}
}}
}
/*****************************************************************************************************/
template<class Impl>
RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->Meooe5D(psi, Din);
this->DW(Din, chi, DaggerNo);
axpby(chi, 1.0, 1.0, chi, psi);
this->M5D(psi, chi);
return(norm2(chi));
}
template<class Impl>
RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
FermionField Din(psi._grid);
this->DW(psi, Din, DaggerYes);
this->MeooeDag5D(Din, chi);
this->M5Ddag(psi, chi);
axpby(chi, 1.0, 1.0, chi, psi);
return(norm2(chi));
}
/********************************************************************
* Performance critical fermion operators called inside the inverter
********************************************************************/
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
}
// half checkerboard operations
template<class Impl>
void MobiusEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
// coefficients of Mooee
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
upper[s] = -this->cee[s];
lower[s] = -this->cee[s];
}
upper[Ls-1] *= -this->mq1;
lower[0] *= -this->mq1;
// no shift term
if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
{
int Ls = this->Ls;
// coefficients of MooeeDag
std::vector<Coeff_t> diag = this->bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int s=0; s<Ls; s++){
if(s==0) {
upper[s] = -this->cee[s+1];
lower[s] = this->mq1*this->cee[Ls-1];
} else if(s==(Ls-1)) {
upper[s] = this->mq1*this->cee[0];
lower[s] = -this->cee[s-1];
} else {
upper[s] = -this->cee[s+1];
lower[s] = -this->cee[s-1];
}
}
// no shift term
if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); }
// fused M + shift operation
else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
}
/****************************************************************************************/
// Computes coefficients for applying Cayley preconditioned shift operators
// (Mooee + \Delta) --> Mooee_shift
// (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm
// (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm
// For the latter two cases, the operation takes the form
// [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} +
// ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} )
template<class Impl>
void MobiusEOFAFermion<Impl>::SetCoefficientsPrecondShiftOps()
{
int Ls = this->Ls;
int pm = this->pm;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
RealD shift = this->shift;
// Initialize
Mooee_shift.resize(Ls);
MooeeInv_shift_lc.resize(Ls);
MooeeInv_shift_norm.resize(Ls);
MooeeInvDag_shift_lc.resize(Ls);
MooeeInvDag_shift_norm.resize(Ls);
// Construct Mooee_shift
int idx(0);
Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) *
( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
idx = (pm == 1) ? (s) : (Ls-1-s);
Mooee_shift[idx] = N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1);
}
// Tridiagonal solve for MooeeInvDag_shift_lc
{
Coeff_t m(0.0);
std::vector<Coeff_t> d = Mooee_shift;
std::vector<Coeff_t> u(Ls,0.0);
std::vector<Coeff_t> y(Ls,0.0);
std::vector<Coeff_t> q(Ls,0.0);
if(pm == 1){ u[0] = 1.0; }
else{ u[Ls-1] = 1.0; }
// Tridiagonal matrix algorithm + Sherman-Morrison formula
//
// We solve
// ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift
// where Mooee' is the tridiagonal part of Mooee_{+}, and
// u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen
// so that the outer-product u \otimes v gives the (0,Ls-1)
// entry of Mooee_{+}.
//
// We do this as two solves: Mooee'*y = d and Mooee'*q = u,
// and then construct the solution to the original system
// MooeeInvDag_shift_lc = y - <v,y> / ( 1 + <v,q> ) q
if(pm == 1){
for(int s=1; s<Ls; ++s){
m = -this->cee[s] / this->bee[s-1];
d[s] -= m*d[s-1];
u[s] -= m*u[s-1];
}
}
y[Ls-1] = d[Ls-1] / this->bee[Ls-1];
q[Ls-1] = u[Ls-1] / this->bee[Ls-1];
for(int s=Ls-2; s>=0; --s){
if(pm == 1){
y[s] = d[s] / this->bee[s];
q[s] = u[s] / this->bee[s];
} else {
y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s];
q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s];
}
}
// Construct MooeeInvDag_shift_lc
for(int s=0; s<Ls; ++s){
if(pm == 1){
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[0]*y[Ls-1] /
(1.0+mq1*this->cee[0]*q[Ls-1]) * q[s];
} else {
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] /
(1.0+mq1*this->cee[Ls-1]*q[0]) * q[s];
}
}
// Compute remaining coefficients
N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]);
for(int s=0; s<Ls; ++s){
// MooeeInv_shift_lc
if(pm == 1){ MooeeInv_shift_lc[s] = std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s); }
else{ MooeeInv_shift_lc[s] = std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s); }
// MooeeInv_shift_norm
MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N;
// MooeeInvDag_shift_norm
if(pm == 1){ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s) /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
else{ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s) /
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
}
}
}
// Recompute coefficients for a different value of shift constant
template<class Impl>
void MobiusEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
{
this->shift = new_shift;
if(new_shift != 0.0){
SetCoefficientsPrecondShiftOps();
} else {
int Ls = this->Ls;
Mooee_shift.resize(Ls,0.0);
MooeeInv_shift_lc.resize(Ls,0.0);
MooeeInv_shift_norm.resize(Ls,0.0);
MooeeInvDag_shift_lc.resize(Ls,0.0);
MooeeInvDag_shift_norm.resize(Ls,0.0);
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
int Ls = this->Ls;
GridBase* grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0];
if(LLs == Ls){ return; } // Not vectorised in 5th direction
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0; s<Ls; s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = this->mq1*this->cee[0];
Pminus(Ls-1,0) = this->mq1*this->cee[Ls-1];
if(this->shift != 0.0){
RealD c = 0.5 * this->alpha;
RealD d = 0.5;
RealD N = this->shift * this->k * ( std::pow(c+d,Ls) + this->mq1*std::pow(c-d,Ls) );
if(this->pm == 1) {
for(int s=0; s<Ls; ++s){
Pplus(s,Ls-1) += N * std::pow(-1.0,s) * std::pow(c-d,s) / std::pow(c+d,Ls+s+1);
}
} else {
for(int s=0; s<Ls; ++s){
Pminus(s,0) += N * std::pow(-1.0,s+1) * std::pow(c-d,Ls-1-s) / std::pow(c+d,2*Ls-s);
}
}
}
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
if(inv) {
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
typedef typename SiteHalfSpinor::scalar_type scalar_type;
const int Nsimd = Simd::Nsimd();
Matp.resize(Ls*LLs);
Matm.resize(Ls*LLs);
for(int s2=0; s2<Ls; s2++){
for(int s1=0; s1<LLs; s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type*) &Vp;
scalar_type *sm = (scalar_type*) &Vm;
for(int l=0; l<Nsimd; l++){
if(switcheroo<Coeff_t>::iscomplex()) {
sp[l] = PplusMat (l*istride+s1*ostride,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
} else {
// if real
scalar_type tmp;
tmp = PplusMat (l*istride+s1*ostride,s2);
sp[l] = scalar_type(tmp.real(),tmp.real());
tmp = PminusMat(l*istride+s1*ostride,s2);
sm[l] = scalar_type(tmp.real(),tmp.real());
}
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}}
}
FermOpTemplateInstantiate(MobiusEOFAFermion);
GparityFermOpTemplateInstantiate(MobiusEOFAFermion);
}}

View File

@ -0,0 +1,133 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.h
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_MOBIUS_EOFA_FERMION_H
#define GRID_QCD_MOBIUS_EOFA_FERMION_H
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
namespace Grid {
namespace QCD {
template<class Impl>
class MobiusEOFAFermion : public AbstractEOFAFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Shift operator coefficients for red-black preconditioned Mobius EOFA
std::vector<Coeff_t> Mooee_shift;
std::vector<Coeff_t> MooeeInv_shift_lc;
std::vector<Coeff_t> MooeeInv_shift_norm;
std::vector<Coeff_t> MooeeInvDag_shift_lc;
std::vector<Coeff_t> MooeeInvDag_shift_norm;
virtual void Instantiatable(void) {};
// EOFA-specific operations
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
virtual void Dtilde (const FermionField& in, FermionField& out);
virtual void DtildeInv (const FermionField& in, FermionField& out);
// override multiply
virtual RealD M (const FermionField& in, FermionField& out);
virtual RealD Mdag (const FermionField& in, FermionField& out);
// half checkerboard operations
virtual void Mooee (const FermionField& in, FermionField& out);
virtual void MooeeDag (const FermionField& in, FermionField& out);
virtual void MooeeInv (const FermionField& in, FermionField& out);
virtual void MooeeInv_shift (const FermionField& in, FermionField& out);
virtual void MooeeInvDag (const FermionField& in, FermionField& out);
virtual void MooeeInvDag_shift(const FermionField& in, FermionField& out);
virtual void M5D (const FermionField& psi, FermionField& chi);
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs);
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs);
void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
virtual void RefreshShiftCoefficients(RealD new_shift);
// Constructors
MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams());
protected:
void SetCoefficientsPrecondShiftOps(void);
};
}}
#define INSTANTIATE_DPERP_MOBIUS_EOFA(A)\
template void MobiusEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void MobiusEOFAFermion<A>::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
template void MobiusEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
template void MobiusEOFAFermion<A>::M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
template void MobiusEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
template void MobiusEOFAFermion<A>::MooeeInv_shift(const FermionField& psi, FermionField& chi); \
template void MobiusEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi); \
template void MobiusEOFAFermion<A>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi);
#undef MOBIUS_EOFA_DPERP_DENSE
#define MOBIUS_EOFA_DPERP_CACHE
#undef MOBIUS_EOFA_DPERP_LINALG
#define MOBIUS_EOFA_DPERP_VEC
#endif

View File

@ -0,0 +1,429 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
int Ls = this->Ls;
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0){
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0){
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
int Ls = this->Ls;
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){
if(s==0) {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
chi[ss+Ls-1] = zero;
auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){
if(s==0) {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
else{ spProj5m(tmp, psi._odata[ss+s]); }
chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
{
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
// Apply (L^{\prime})^{-1}
chi[ss] = psi[ss]; // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp;
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp;
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
{
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
auto tmp2_spProj = psi._odata[0];
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
chi[ss] = psi[ss]; // chi[0]=psi[0]
tmp2 = MooeeInv_shift_lc[0]*psi[ss];
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s];
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else{ spProj5m(tmp2_spProj, tmp2); }
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1;
}
// chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
// Apply U^{-1} and add shift term
for(int s=Ls-2; s>=0; s--){
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
spProj5m(tmp1, chi[ss+s]);
chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
// Apply (U^{\prime})^{-dag}
chi[ss] = psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp;
}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp;
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
{
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
auto tmp2_spProj = psi._odata[0];
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
chi[ss] = psi[ss];
tmp2 = MooeeInvDag_shift_lc[0]*psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp1;
tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s];
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else{ spProj5m(tmp2_spProj, tmp2); }
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp1;
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj;
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1;
spProj5p(tmp1, chi[ss+s]);
chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj;
}
}
this->MooeeInvTime += usecond();
}
#ifdef MOBIUS_EOFA_DPERP_CACHE
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -0,0 +1,184 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
int pm = this->pm;
RealD shift = this->shift;
RealD alpha = this->alpha;
RealD k = this->k;
RealD mq1 = this->mq1;
chi.checkerboard = psi.checkerboard;
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = this->bee[s];
Pminus(s,s) = this->bee[s];
}
for(int s=0; s<Ls-1; s++){
Pminus(s,s+1) = -this->cee[s];
}
for(int s=0; s<Ls-1; s++){
Pplus(s+1,s) = -this->cee[s+1];
}
Pplus (0,Ls-1) = mq1*this->cee[0];
Pminus(Ls-1,0) = mq1*this->cee[Ls-1];
if(shift != 0.0){
Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
for(int s=0; s<Ls; ++s){
if(pm == 1){ Pplus(s,Ls-1) += shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
else{ Pminus(Ls-1-s,Ls-1) -= shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
}
}
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if(inv){
PplusMat = Pplus.inverse();
PminusMat = Pminus.inverse();
} else {
PplusMat = Pplus;
PminusMat = Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0; site<vol; site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = zero;
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
}
if(PminusMat(s1,s2) != 0.0){
spProj5m(SitePminus, psi[lex2]);
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
#ifdef MOBIUS_EOFA_DPERP_DENSE
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
template void MobiusEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
template void MobiusEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -0,0 +1,290 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
}
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(s==0) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
} else if (s==(Ls-1)) {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
} else {
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
}
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
}
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (L^{\prime})^{-1}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0);
for(int s=1; s<Ls; s++){
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
}
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
// Apply U^{-1} and add shift term
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
}
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
Coeff_t one(1.0);
Coeff_t czero(0.0);
chi.checkerboard = psi.checkerboard;
int Ls = this->Ls;
FermionField tmp(psi._grid);
// Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0]
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0);
for(int s=1; s<Ls; s++){
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
}
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
// Apply L^{-dagger} and add shift
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
for(int s=Ls-2; s>=0; s--){
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
}
}
#ifdef MOBIUS_EOFA_DPERP_LINALG
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
#endif
}}

View File

@ -0,0 +1,983 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
{
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
assert(Nc == 3);
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5m(hp, psi[ss+vp]);
spProj5p(hm, psi[ss+vm]);
if (vp <= v){ rotate(hp, hp, 1); }
if (vm >= v){ rotate(hm, hm, nsimd-1); }
hp = 0.5*hp;
hm = 0.5*hm;
spRecon5m(fp, hp);
spRecon5p(fm, hm);
chi[ss+v] = d[v]*phi[ss+v];
chi[ss+v] = chi[ss+v] + u[v]*fp;
chi[ss+v] = chi[ss+v] + l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if(vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
#if 0
this->M5D(psi, phi, chi, lower, diag, upper);
// FIXME: possible gain from vectorizing shift operation as well?
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
}
#else
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
Vector<iSinglet<Simd>> s(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
scalar_type* s_p = (scalar_type*) &s[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
s_p[ss] = shift_coeffs[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
assert(Nc == 3);
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
int vs = (this->pm == 1) ? LLs-1 : 0;
Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(2)(0) : psi[ss+vs]()(0)(0);
Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(2)(1) : psi[ss+vs]()(0)(1);
Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(2)(2) : psi[ss+vs]()(0)(2);
Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(3)(0) : psi[ss+vs]()(1)(0);
Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(3)(1) : psi[ss+vs]()(1)(1);
Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(3)(2) : psi[ss+vs]()(1)(2);
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(2)(0);
Simd hp_01 = psi[ss+vp]()(2)(1);
Simd hp_02 = psi[ss+vp]()(2)(2);
Simd hp_10 = psi[ss+vp]()(3)(0);
Simd hp_11 = psi[ss+vp]()(3)(1);
Simd hp_12 = psi[ss+vp]()(3)(2);
Simd hm_00 = psi[ss+vm]()(0)(0);
Simd hm_01 = psi[ss+vm]()(0)(1);
Simd hm_02 = psi[ss+vm]()(0)(2);
Simd hm_10 = psi[ss+vm]()(1)(0);
Simd hm_11 = psi[ss+vm]()(1)(1);
Simd hm_12 = psi[ss+vm]()(1)(2);
if(vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(this->pm == 1 && vs <= v){
hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
if(this->pm == -1 && vs >= v){
hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
}
// Can force these to real arithmetic and save 2x.
Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
}
this->M5Dtime += usecond();
#endif
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
#if 0
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0; v<LLs; v++){
int vp = (v+1)%LLs;
int vm = (v+LLs-1)%LLs;
spProj5p(hp, psi[ss+vp]);
spProj5m(hm, psi[ss+vm]);
if(vp <= v){ rotate(hp, hp, 1); }
if(vm >= v){ rotate(hm, hm, nsimd-1); }
hp = hp*0.5;
hm = hm*0.5;
spRecon5p(fp, hp);
spRecon5m(fm, hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
#else
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if (vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
#endif
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
#if 0
this->M5Ddag(psi, phi, chi, lower, diag, upper);
// FIXME: possible gain from vectorizing shift operation as well?
Coeff_t one(1.0);
int Ls = this->Ls;
for(int s=0; s<Ls; s++){
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
}
#else
GridBase* grid = psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
Vector<iSinglet<Simd>> u(LLs);
Vector<iSinglet<Simd>> l(LLs);
Vector<iSinglet<Simd>> d(LLs);
Vector<iSinglet<Simd>> s(LLs);
assert(Ls/LLs == nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
scalar_type* l_p = (scalar_type*) &l[0];
scalar_type* d_p = (scalar_type*) &d[0];
scalar_type* s_p = (scalar_type*) &s[0];
for(int o=0; o<LLs; o++){ // outer
for(int i=0; i<nsimd; i++){ //inner
int s = o + i*LLs;
int ss = o*nsimd + i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
s_p[ss] = shift_coeffs[s];
}}
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
int vs = (this->pm == 1) ? LLs-1 : 0;
Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(0)(0) : psi[ss+vs]()(2)(0);
Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(0)(1) : psi[ss+vs]()(2)(1);
Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(0)(2) : psi[ss+vs]()(2)(2);
Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(1)(0) : psi[ss+vs]()(3)(0);
Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(1)(1) : psi[ss+vs]()(3)(1);
Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(1)(2) : psi[ss+vs]()(3)(2);
for(int v=0; v<LLs; v++){
vprefetch(psi[ss+v+LLs]);
int vp = (v == LLs-1) ? 0 : v+1;
int vm = (v == 0 ) ? LLs-1 : v-1;
Simd hp_00 = psi[ss+vp]()(0)(0);
Simd hp_01 = psi[ss+vp]()(0)(1);
Simd hp_02 = psi[ss+vp]()(0)(2);
Simd hp_10 = psi[ss+vp]()(1)(0);
Simd hp_11 = psi[ss+vp]()(1)(1);
Simd hp_12 = psi[ss+vp]()(1)(2);
Simd hm_00 = psi[ss+vm]()(2)(0);
Simd hm_01 = psi[ss+vm]()(2)(1);
Simd hm_02 = psi[ss+vm]()(2)(2);
Simd hm_10 = psi[ss+vm]()(3)(0);
Simd hm_11 = psi[ss+vm]()(3)(1);
Simd hm_12 = psi[ss+vm]()(3)(2);
if (vp <= v){
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
}
if(this->pm == 1 && vs <= v){
hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
}
if(vm >= v){
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
}
if(this->pm == -1 && vs >= v){
hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
}
Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
vstream(chi[ss+v]()(0)(0), p_00);
vstream(chi[ss+v]()(0)(1), p_01);
vstream(chi[ss+v]()(0)(2), p_02);
vstream(chi[ss+v]()(1)(0), p_10);
vstream(chi[ss+v]()(1)(1), p_11);
vstream(chi[ss+v]()(1)(2), p_12);
vstream(chi[ss+v]()(2)(0), p_20);
vstream(chi[ss+v]()(2)(1), p_21);
vstream(chi[ss+v]()(2)(2), p_22);
vstream(chi[ss+v]()(3)(0), p_30);
vstream(chi[ss+v]()(3)(1), p_31);
vstream(chi[ss+v]()(3)(2), p_32);
}
}
this->M5Dtime += usecond();
#endif
}
#ifdef AVX512
#include<simd/Intel512common.h>
#include<simd/Intel512avx.h>
#include<simd/Intel512single.h>
#endif
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
#ifndef AVX512
{
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
SiteHalfSpinor SiteChiM;
// Ls*Ls * 2 * 12 * vol flops
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
int s = s2 + l*LLs;
int lex = s2 + LLs*site;
if( s2==0 && l==0 ){
SiteChiP=zero;
SiteChiM=zero;
}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
}}
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
}}
}}
{
int lex = s1 + LLs*site;
for(int sp=0; sp<2; sp++){
for(int co=0; co<Nc; co++){
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
}}
}
}
}
#else
{
// pointers
// MASK_REGS;
#define Chi_00 %%zmm1
#define Chi_01 %%zmm2
#define Chi_02 %%zmm3
#define Chi_10 %%zmm4
#define Chi_11 %%zmm5
#define Chi_12 %%zmm6
#define Chi_20 %%zmm7
#define Chi_21 %%zmm8
#define Chi_22 %%zmm9
#define Chi_30 %%zmm10
#define Chi_31 %%zmm11
#define Chi_32 %%zmm12
#define BCAST0 %%zmm13
#define BCAST1 %%zmm14
#define BCAST2 %%zmm15
#define BCAST3 %%zmm16
#define BCAST4 %%zmm17
#define BCAST5 %%zmm18
#define BCAST6 %%zmm19
#define BCAST7 %%zmm20
#define BCAST8 %%zmm21
#define BCAST9 %%zmm22
#define BCAST10 %%zmm23
#define BCAST11 %%zmm24
int incr = LLs*LLs*sizeof(iSinglet<Simd>);
for(int s1=0; s1<LLs; s1++){
for(int s2=0; s2<LLs; s2++){
int lex = s2 + LLs*site;
uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
uint64_t a2 = (uint64_t) &psi[lex];
for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
if((s2+l)==0) {
asm(
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
VBCASTCDUP(0,%2,BCAST0)
VBCASTCDUP(1,%2,BCAST1)
VBCASTCDUP(2,%2,BCAST2)
VBCASTCDUP(3,%2,BCAST3)
VBCASTCDUP(4,%2,BCAST4) VMULMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(5,%2,BCAST5) VMULMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(6,%2,BCAST6) VMULMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(7,%2,BCAST7) VMULMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(8,%2,BCAST8) VMULMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(9,%2,BCAST9) VMULMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(10,%2,BCAST10) VMULMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(11,%2,BCAST11) VMULMEM(0,%1,BCAST7,Chi_21)
VMULMEM(0,%1,BCAST8,Chi_22)
VMULMEM(0,%1,BCAST9,Chi_30)
VMULMEM(0,%1,BCAST10,Chi_31)
VMULMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
} else {
asm(
VBCASTCDUP(0,%2,BCAST0) VMADDMEM(0,%0,BCAST0,Chi_00)
VBCASTCDUP(1,%2,BCAST1) VMADDMEM(0,%0,BCAST1,Chi_01)
VBCASTCDUP(2,%2,BCAST2) VMADDMEM(0,%0,BCAST2,Chi_02)
VBCASTCDUP(3,%2,BCAST3) VMADDMEM(0,%0,BCAST3,Chi_10)
VBCASTCDUP(4,%2,BCAST4) VMADDMEM(0,%0,BCAST4,Chi_11)
VBCASTCDUP(5,%2,BCAST5) VMADDMEM(0,%0,BCAST5,Chi_12)
VBCASTCDUP(6,%2,BCAST6) VMADDMEM(0,%1,BCAST6,Chi_20)
VBCASTCDUP(7,%2,BCAST7) VMADDMEM(0,%1,BCAST7,Chi_21)
VBCASTCDUP(8,%2,BCAST8) VMADDMEM(0,%1,BCAST8,Chi_22)
VBCASTCDUP(9,%2,BCAST9) VMADDMEM(0,%1,BCAST9,Chi_30)
VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
: : "r" (a0), "r" (a1), "r" (a2) );
}
a0 = a0 + incr;
a1 = a1 + incr;
a2 = a2 + sizeof(Simd::scalar_type);
}
}
{
int lexa = s1+LLs*site;
asm (
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
}
}
}
#undef Chi_00
#undef Chi_01
#undef Chi_02
#undef Chi_10
#undef Chi_11
#undef Chi_12
#undef Chi_20
#undef Chi_21
#undef Chi_22
#undef Chi_30
#undef Chi_31
#undef Chi_32
#undef BCAST0
#undef BCAST1
#undef BCAST2
#undef BCAST3
#undef BCAST4
#undef BCAST5
#undef BCAST6
#undef BCAST7
#undef BCAST8
#undef BCAST9
#undef BCAST10
#undef BCAST11
#endif
};
// Z-mobius version
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
exit(-1);
};
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
int Ls = this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard = psi.checkerboard;
Vector<iSinglet<Simd>> Matp;
Vector<iSinglet<Simd>> Matm;
Vector<iSinglet<Simd>>* _Matp;
Vector<iSinglet<Simd>>* _Matm;
// MooeeInternalCompute(dag,inv,Matp,Matm);
if(inv && dag){
_Matp = &this->MatpInvDag;
_Matm = &this->MatmInvDag;
}
if(inv && (!dag)){
_Matp = &this->MatpInv;
_Matm = &this->MatmInv;
}
if(!inv){
MooeeInternalCompute(dag, inv, Matp, Matm);
_Matp = &Matp;
_Matm = &Matm;
}
assert(_Matp->size() == Ls*LLs);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
if(switcheroo<Coeff_t>::iscomplex()){
parallel_for(auto site=0; site<vol; site++){
MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
} else {
parallel_for(auto site=0; site<vol; site++){
MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
}
}
this->MooeeInvTime += usecond();
}
#ifdef MOBIUS_EOFA_DPERP_VEC
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplFH);
template void MobiusEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
template void MobiusEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
#endif
}}

View File

@ -0,0 +1,264 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/ExactOneFlavourRatio.h
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
/////////////////////////////////////////////////////////////////
// Implementation of exact one flavour algorithm (EOFA) //
// using fermion classes defined in: //
// Grid/qcd/action/fermion/DomainWallEOFAFermion.h (Shamir) //
// Grid/qcd/action/fermion/MobiusEOFAFermion.h (Mobius) //
// arXiv: 1403.1683, 1706.05843 //
/////////////////////////////////////////////////////////////////
#ifndef QCD_PSEUDOFERMION_EXACT_ONE_FLAVOUR_RATIO_H
#define QCD_PSEUDOFERMION_EXACT_ONE_FLAVOUR_RATIO_H
namespace Grid{
namespace QCD{
///////////////////////////////////////////////////////////////
// Exact one flavour implementation of DWF determinant ratio //
///////////////////////////////////////////////////////////////
template<class Impl>
class ExactOneFlavourRatioPseudoFermionAction : public Action<typename Impl::GaugeField>
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef OneFlavourRationalParams Params;
Params param;
MultiShiftFunction PowerNegHalf;
private:
bool use_heatbath_forecasting;
AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
SchurRedBlackDiagMooeeSolve<FermionField> Solver;
FermionField Phi; // the pseudofermion field for this trajectory
public:
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, AbstractEOFAFermion<Impl>& _Rop,
OperatorFunction<FermionField>& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), Solver(S),
Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc)
{
AlgRemez remez(param.lo, param.hi, param.precision);
// MdagM^(+- 1/2)
std::cout << GridLogMessage << "Generating degree " << param.degree << " for x^(-1/2)" << std::endl;
remez.generateApprox(param.degree, 1, 2);
PowerNegHalf.Init(remez, param.tolerance, true);
};
virtual std::string action_name() { return "ExactOneFlavourRatioPseudoFermionAction"; }
virtual std::string LogParameters() {
std::stringstream sstream;
sstream << GridLogMessage << "[" << action_name() << "] Low :" << param.lo << std::endl;
sstream << GridLogMessage << "[" << action_name() << "] High :" << param.hi << std::endl;
sstream << GridLogMessage << "[" << action_name() << "] Max iterations :" << param.MaxIter << std::endl;
sstream << GridLogMessage << "[" << action_name() << "] Tolerance :" << param.tolerance << std::endl;
sstream << GridLogMessage << "[" << action_name() << "] Degree :" << param.degree << std::endl;
sstream << GridLogMessage << "[" << action_name() << "] Precision :" << param.precision << std::endl;
return sstream.str();
}
// Spin projection
void spProj(const FermionField& in, FermionField& out, int sign, int Ls)
{
if(sign == 1){ for(int s=0; s<Ls; ++s){ axpby_ssp_pplus(out, 0.0, in, 1.0, in, s, s); } }
else{ for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(out, 0.0, in, 1.0, in, s, s); } }
}
// EOFA heatbath: see Eqn. (29) of arXiv:1706.05843
// We generate a Gaussian noise vector \eta, and then compute
// \Phi = M_{\rm EOFA}^{-1/2} * \eta
// using a rational approximation to the inverse square root
virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG)
{
Lop.ImportGauge(U);
Rop.ImportGauge(U);
FermionField eta (Lop.FermionGrid());
FermionField CG_src (Lop.FermionGrid());
FermionField CG_soln (Lop.FermionGrid());
FermionField Forecast_src(Lop.FermionGrid());
std::vector<FermionField> tmp(2, Lop.FermionGrid());
// Use chronological inverter to forecast solutions across poles
std::vector<FermionField> prev_solns;
if(use_heatbath_forecasting){ prev_solns.reserve(param.degree); }
ChronoForecast<AbstractEOFAFermion<Impl>, FermionField> Forecast;
// Seed with Gaussian noise vector (var = 0.5)
RealD scale = std::sqrt(0.5);
gaussian(pRNG,eta);
eta = eta * scale;
printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta));
// \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
RealD N(PowerNegHalf.norm);
for(int k=0; k<param.degree; ++k){ N += PowerNegHalf.residues[k] / ( 1.0 + PowerNegHalf.poles[k] ); }
Phi = eta * N;
// LH terms:
// \Phi = \Phi + k \sum_{k=1}^{N_{p}} P_{-} \Omega_{-}^{\dagger} ( H(mf)
// - \gamma_{l} \Delta_{-}(mf,mb) P_{-} )^{-1} \Omega_{-} P_{-} \eta
RealD gamma_l(0.0);
spProj(eta, tmp[0], -1, Lop.Ls);
Lop.Omega(tmp[0], tmp[1], -1, 0);
G5R5(CG_src, tmp[1]);
tmp[1] = zero;
for(int k=0; k<param.degree; ++k){
gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
Lop.RefreshShiftCoefficients(-gamma_l);
if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles
Lop.Mdag(CG_src, Forecast_src);
CG_soln = Forecast(Lop, Forecast_src, prev_solns);
Solver(Lop, CG_src, CG_soln);
prev_solns.push_back(CG_soln);
} else {
CG_soln = zero; // Just use zero as the initial guess
Solver(Lop, CG_src, CG_soln);
}
Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0];
}
Lop.Omega(tmp[1], tmp[0], -1, 1);
spProj(tmp[0], tmp[1], -1, Lop.Ls);
Phi = Phi + tmp[1];
// RH terms:
// \Phi = \Phi - k \sum_{k=1}^{N_{p}} P_{+} \Omega_{+}^{\dagger} ( H(mb)
// + \gamma_{l} \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \eta
spProj(eta, tmp[0], 1, Rop.Ls);
Rop.Omega(tmp[0], tmp[1], 1, 0);
G5R5(CG_src, tmp[1]);
tmp[1] = zero;
if(use_heatbath_forecasting){ prev_solns.clear(); } // empirically, LH solns don't help for RH solves
for(int k=0; k<param.degree; ++k){
gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
Rop.RefreshShiftCoefficients(-gamma_l*PowerNegHalf.poles[k]);
if(use_heatbath_forecasting){
Rop.Mdag(CG_src, Forecast_src);
CG_soln = Forecast(Rop, Forecast_src, prev_solns);
Solver(Rop, CG_src, CG_soln);
prev_solns.push_back(CG_soln);
} else {
CG_soln = zero;
Solver(Rop, CG_src, CG_soln);
}
Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0];
}
Rop.Omega(tmp[1], tmp[0], 1, 1);
spProj(tmp[0], tmp[1], 1, Rop.Ls);
Phi = Phi + tmp[1];
// Reset shift coefficients for energy and force evals
Lop.RefreshShiftCoefficients(0.0);
Rop.RefreshShiftCoefficients(-1.0);
};
// EOFA action: see Eqn. (10) of arXiv:1706.05843
virtual RealD S(const GaugeField& U)
{
Lop.ImportGauge(U);
Rop.ImportGauge(U);
FermionField spProj_Phi(Lop.FermionGrid());
std::vector<FermionField> tmp(2, Lop.FermionGrid());
// S = <\Phi|\Phi>
RealD action(norm2(Phi));
// LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi>
spProj(Phi, spProj_Phi, -1, Lop.Ls);
Lop.Omega(spProj_Phi, tmp[0], -1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
Solver(Lop, tmp[1], tmp[0]);
Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
Lop.Omega(tmp[1], tmp[0], -1, 1);
action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
// RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
spProj(Phi, spProj_Phi, 1, Rop.Ls);
Rop.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
Solver(Rop, tmp[1], tmp[0]);
Rop.Dtilde(tmp[0], tmp[1]);
Rop.Omega(tmp[1], tmp[0], 1, 1);
action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
return action;
};
// EOFA pseudofermion force: see Eqns. (34)-(36) of arXiv:1706.05843
virtual void deriv(const GaugeField& U, GaugeField& dSdU)
{
Lop.ImportGauge(U);
Rop.ImportGauge(U);
FermionField spProj_Phi (Lop.FermionGrid());
FermionField Omega_spProj_Phi(Lop.FermionGrid());
FermionField CG_src (Lop.FermionGrid());
FermionField Chi (Lop.FermionGrid());
FermionField g5_R5_Chi (Lop.FermionGrid());
GaugeField force(Lop.GaugeGrid());
// LH: dSdU = k \chi_{L}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{L}
// \chi_{L} = H(mf)^{-1} \Omega_{-} P_{-} \Phi
spProj(Phi, spProj_Phi, -1, Lop.Ls);
Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0);
G5R5(CG_src, Omega_spProj_Phi);
spProj_Phi = zero;
Solver(Lop, CG_src, spProj_Phi);
Lop.Dtilde(spProj_Phi, Chi);
G5R5(g5_R5_Chi, Chi);
Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
dSdU = Lop.k * force;
// RH: dSdU = dSdU - k \chi_{R}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{}
// \chi_{R} = ( H(mb) - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \Phi
spProj(Phi, spProj_Phi, 1, Rop.Ls);
Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0);
G5R5(CG_src, Omega_spProj_Phi);
spProj_Phi = zero;
Solver(Rop, CG_src, spProj_Phi);
Rop.Dtilde(spProj_Phi, Chi);
G5R5(g5_R5_Chi, Chi);
Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
dSdU = dSdU - Rop.k * force;
};
};
}}
#endif

View File

@ -38,5 +38,6 @@ directory
#include <Grid/qcd/action/pseudofermion/OneFlavourRationalRatio.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
#include <Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h>
#endif

View File

@ -202,6 +202,39 @@ class DomainWallFermionModule: public FermionOperatorModule<DomainWallFermion, F
};
class DomainWallEOFAFermionParameters : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(DomainWallEOFAFermionParameters,
RealD, mq1,
RealD, mq2,
RealD, mq3,
RealD, shift,
int, pm,
RealD, M5,
unsigned int, Ls);
};
template <class FermionImpl >
class DomainWallEOFAFermionModule: public FermionOperatorModule<DomainWallEOFAFermion, FermionImpl, DomainWallEOFAFermionParameters> {
typedef FermionOperatorModule<DomainWallEOFAFermion, FermionImpl, DomainWallEOFAFermionParameters> FermBase;
using FermBase::FermBase; // for constructors
virtual unsigned int Ls(){
return this->Par_.Ls;
}
// acquire resource
virtual void initialize(){
auto GridMod = this->GridRefs[0];
auto GridMod5d = this->GridRefs[1];
typename FermionImpl::GaugeField U(GridMod->get_full());
this->FOPtr.reset(new DomainWallEOFAFermion<FermionImpl>( U, *(GridMod->get_full()), *(GridMod->get_rb()),
*(GridMod5d->get_full()), *(GridMod5d->get_rb()),
this->Par_.mq1, this->Par_.mq2, this->Par_.mq3,
this->Par_.shift, this->Par_.pm, this->Par_.M5));
}
};
} // QCD
} // Grid

View File

@ -60,7 +60,7 @@ GridCartesian *SpaceTimeGrid::makeFiveDimGrid(int Ls,const GridCartesian
simd5.push_back(FourDimGrid->_simd_layout[d]);
mpi5.push_back(FourDimGrid->_processors[d]);
}
return new GridCartesian(latt5,simd5,mpi5);
return new GridCartesian(latt5,simd5,mpi5,*FourDimGrid);
}
@ -68,18 +68,14 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridC
{
int N4=FourDimGrid->_ndimension;
int cbd=1;
std::vector<int> latt5(1,Ls);
std::vector<int> simd5(1,1);
std::vector<int> mpi5(1,1);
std::vector<int> cb5(1,0);
for(int d=0;d<N4;d++){
latt5.push_back(FourDimGrid->_fdimensions[d]);
simd5.push_back(FourDimGrid->_simd_layout[d]);
mpi5.push_back(FourDimGrid->_processors[d]);
cb5.push_back( 1);
}
return new GridRedBlackCartesian(latt5,simd5,mpi5,cb5,cbd);
GridCartesian *tmp = makeFiveDimGrid(Ls,FourDimGrid);
GridRedBlackCartesian *ret = new GridRedBlackCartesian(tmp,cb5,cbd);
delete tmp;
return ret;
}
@ -97,26 +93,24 @@ GridCartesian *SpaceTimeGrid::makeFiveDimDWFGrid(int Ls,const GridCartes
simd5.push_back(1);
mpi5.push_back(FourDimGrid->_processors[d]);
}
return new GridCartesian(latt5,simd5,mpi5);
return new GridCartesian(latt5,simd5,mpi5,*FourDimGrid);
}
///////////////////////////////////////////////////
// Interface is inefficient and forces the deletion
// Pass in the non-redblack grid
///////////////////////////////////////////////////
GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(int Ls,const GridCartesian *FourDimGrid)
{
int N4=FourDimGrid->_ndimension;
int nsimd = FourDimGrid->Nsimd();
int cbd=1;
std::vector<int> latt5(1,Ls);
std::vector<int> simd5(1,nsimd);
std::vector<int> mpi5(1,1);
std::vector<int> cb5(1,0);
for(int d=0;d<N4;d++){
latt5.push_back(FourDimGrid->_fdimensions[d]);
simd5.push_back(1);
mpi5.push_back(FourDimGrid->_processors[d]);
cb5.push_back(1);
}
return new GridRedBlackCartesian(latt5,simd5,mpi5,cb5,cbd);
GridCartesian *tmp = makeFiveDimDWFGrid(Ls,FourDimGrid);
GridRedBlackCartesian *ret = new GridRedBlackCartesian(tmp,cb5,cbd);
delete tmp;
return ret;
}

View File

@ -175,7 +175,7 @@ class TensorIndexRecursion {
}
}
template<class vtype,int N> inline static
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0)),N> &arg, int i,int j)
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0,0)),N> &arg, int i,int j)
{
for(int ii=0;ii<N;ii++){
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i,j);
@ -191,7 +191,7 @@ class TensorIndexRecursion {
}}
}
template<class vtype,int N> inline static
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0)),N> &arg, int i,int j)
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0,0)),N> &arg, int i,int j)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){

View File

@ -243,6 +243,12 @@ void Grid_init(int *argc,char ***argv)
fname<<CartesianCommunicator::RankWorld();
fp=freopen(fname.str().c_str(),"w",stdout);
assert(fp!=(FILE *)NULL);
std::ostringstream ename;
ename<<"Grid.stderr.";
ename<<CartesianCommunicator::RankWorld();
fp=freopen(ename.str().c_str(),"w",stderr);
assert(fp!=(FILE *)NULL);
}
////////////////////////////////////

View File

@ -7,7 +7,7 @@ namespace Grid{
class Lexicographic {
public:
static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
static inline void CoorFromIndex (std::vector<int>& coor,int index,const std::vector<int> &dims){
int nd= dims.size();
coor.resize(nd);
for(int d=0;d<nd;d++){
@ -16,7 +16,7 @@ namespace Grid{
}
}
static inline void IndexFromCoor (std::vector<int>& coor,int &index,std::vector<int> &dims){
static inline void IndexFromCoor (const std::vector<int>& coor,int &index,const std::vector<int> &dims){
int nd=dims.size();
int stride=1;
index=0;

View File

@ -48,7 +48,7 @@ int main(int argc, char ** argv) {
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian rbFine(&Fine);
GridParallelRNG fRNG(&Fine);
// fRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});

View File

@ -47,7 +47,7 @@ int main (int argc, char ** argv)
mask[0]=0;
GridCartesian Fine (latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout,mask,1);
GridRedBlackCartesian RBFine(&Fine,mask,1);
GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));

View File

@ -47,7 +47,7 @@ int main (int argc, char ** argv)
mask[0]=0;
GridCartesian Fine (latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout,mask,1);
GridRedBlackCartesian RBFine(&Fine,mask,1);
GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));

View File

@ -0,0 +1,239 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/core/Test_dwf_eofa_even_odd.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
int main (int argc, char ** argv)
{
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
const int Ls = 8;
// GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
LatticeFermion src (FGrid); random(RNG5, src);
LatticeFermion phi (FGrid); random(RNG5, phi);
LatticeFermion chi (FGrid); random(RNG5, chi);
LatticeFermion result(FGrid); result = zero;
LatticeFermion ref (FGrid); ref = zero;
LatticeFermion tmp (FGrid); tmp = zero;
LatticeFermion err (FGrid); err = zero;
LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
// Only one non-zero (y)
Umu = zero;
for(int nn=0; nn<Nd; nn++){
random(RNG4, U[nn]);
if(nn>0){ U[nn] = zero; }
PokeIndex<LorentzIndex>(Umu, U[nn], nn);
}
RealD mq1 = 0.1;
RealD mq2 = 0.5;
RealD mq3 = 1.0;
RealD shift = 0.1234;
RealD M5 = 1.8;
int pm = 1;
DomainWallEOFAFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mq1, mq2, mq3, shift, pm, M5);
LatticeFermion src_e (FrbGrid);
LatticeFermion src_o (FrbGrid);
LatticeFermion r_e (FrbGrid);
LatticeFermion r_o (FrbGrid);
LatticeFermion r_eo (FGrid);
LatticeFermion r_eeoo(FGrid);
std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing that Meo + Moe + Moo + Mee = Munprec " << std::endl;
std::cout << GridLogMessage << "==========================================================" << std::endl;
pickCheckerboard(Even, src_e, src);
pickCheckerboard(Odd, src_o, src);
Ddwf.Meooe(src_e, r_o); std::cout << GridLogMessage << "Applied Meo" << std::endl;
Ddwf.Meooe(src_o, r_e); std::cout << GridLogMessage << "Applied Moe" << std::endl;
setCheckerboard(r_eo, r_o);
setCheckerboard(r_eo, r_e);
Ddwf.Mooee(src_e, r_e); std::cout << GridLogMessage << "Applied Mee" << std::endl;
Ddwf.Mooee(src_o, r_o); std::cout << GridLogMessage << "Applied Moo" << std::endl;
setCheckerboard(r_eeoo, r_e);
setCheckerboard(r_eeoo, r_o);
r_eo = r_eo + r_eeoo;
Ddwf.M(src, ref);
// std::cout << GridLogMessage << r_eo << std::endl;
// std::cout << GridLogMessage << ref << std::endl;
err = ref - r_eo;
std::cout << GridLogMessage << "EO norm diff " << norm2(err) << " " << norm2(ref) << " " << norm2(r_eo) << std::endl;
LatticeComplex cerr(FGrid);
cerr = localInnerProduct(err,err);
// std::cout << GridLogMessage << cerr << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl;
std::cout << GridLogMessage << "= < phi | Deo | chi > * = < chi | Deo^dag| phi> " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
LatticeFermion chi_e (FrbGrid);
LatticeFermion chi_o (FrbGrid);
LatticeFermion dchi_e(FrbGrid);
LatticeFermion dchi_o(FrbGrid);
LatticeFermion phi_e (FrbGrid);
LatticeFermion phi_o (FrbGrid);
LatticeFermion dphi_e(FrbGrid);
LatticeFermion dphi_o(FrbGrid);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd , chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd , phi_o, phi);
Ddwf.Meooe (chi_e, dchi_o);
Ddwf.Meooe (chi_o, dchi_e);
Ddwf.MeooeDag(phi_e, dphi_o);
Ddwf.MeooeDag(phi_o, dphi_e);
ComplexD pDce = innerProduct(phi_e, dchi_e);
ComplexD pDco = innerProduct(phi_o, dchi_o);
ComplexD cDpe = innerProduct(chi_e, dphi_e);
ComplexD cDpo = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl;
std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl;
std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce-conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco-conj(cDpe) << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInv Mee = 1 " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd , chi_o, chi);
Ddwf.Mooee (chi_e, src_e);
Ddwf.MooeeInv(src_e, phi_e);
Ddwf.Mooee (chi_o, src_o);
Ddwf.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInvDag MeeDag = 1 " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd , chi_o, chi);
Ddwf.MooeeDag (chi_e, src_e);
Ddwf.MooeeInvDag(src_e, phi_e);
Ddwf.MooeeDag (chi_o, src_o);
Ddwf.MooeeInvDag(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MpcDagMpc is Hermitian " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
random(RNG5, phi);
random(RNG5, chi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd , chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd , phi_o, phi);
RealD t1,t2;
SchurDiagMooeeOperator<DomainWallEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2);
HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2);
HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2);
HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2);
pDce = innerProduct(phi_e, dchi_e);
pDco = innerProduct(phi_o, dchi_o);
cDpe = innerProduct(chi_e, dphi_e);
cDpo = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl;
std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl;
std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDco-conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDce-conj(cDpe) << std::endl;
Grid_finalize();
}

View File

@ -47,7 +47,7 @@ int main (int argc, char ** argv)
vol = vol * latt_size[d];
}
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGRID(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGRID(&GRID);
LatticeComplexD one(&GRID);
LatticeComplexD zz(&GRID);

View File

@ -40,7 +40,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

View File

@ -84,7 +84,7 @@ int main(int argc, char **argv) {
double volume = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
GridCartesian Fine(latt_size, simd_layout, mpi_layout);
GridRedBlackCartesian rbFine(latt_size, simd_layout, mpi_layout);
GridRedBlackCartesian rbFine(&Fine);
GridParallelRNG FineRNG(&Fine);
GridSerialRNG SerialRNG;
GridSerialRNG SerialRNG1;

View File

@ -0,0 +1,241 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/core/Test_dwf_eofa_even_odd.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
int main (int argc, char ** argv)
{
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
const int Ls = 8;
// GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
LatticeFermion src (FGrid); random(RNG5, src);
LatticeFermion phi (FGrid); random(RNG5, phi);
LatticeFermion chi (FGrid); random(RNG5, chi);
LatticeFermion result(FGrid); result = zero;
LatticeFermion ref (FGrid); ref = zero;
LatticeFermion tmp (FGrid); tmp = zero;
LatticeFermion err (FGrid); err = zero;
LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
// Only one non-zero (y)
Umu = zero;
for(int nn=0; nn<Nd; nn++){
random(RNG4, U[nn]);
if(nn>0){ U[nn] = zero; }
PokeIndex<LorentzIndex>(Umu, U[nn], nn);
}
RealD b = 2.5;
RealD c = 1.5;
RealD mq1 = 0.1;
RealD mq2 = 0.5;
RealD mq3 = 1.0;
RealD shift = 0.1234;
RealD M5 = 1.8;
int pm = 1;
MobiusEOFAFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mq1, mq2, mq3, shift, pm, M5, b, c);
LatticeFermion src_e (FrbGrid);
LatticeFermion src_o (FrbGrid);
LatticeFermion r_e (FrbGrid);
LatticeFermion r_o (FrbGrid);
LatticeFermion r_eo (FGrid);
LatticeFermion r_eeoo(FGrid);
std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing that Meo + Moe + Moo + Mee = Munprec " << std::endl;
std::cout << GridLogMessage << "==========================================================" << std::endl;
pickCheckerboard(Even, src_e, src);
pickCheckerboard(Odd, src_o, src);
Ddwf.Meooe(src_e, r_o); std::cout << GridLogMessage << "Applied Meo" << std::endl;
Ddwf.Meooe(src_o, r_e); std::cout << GridLogMessage << "Applied Moe" << std::endl;
setCheckerboard(r_eo, r_o);
setCheckerboard(r_eo, r_e);
Ddwf.Mooee(src_e, r_e); std::cout << GridLogMessage << "Applied Mee" << std::endl;
Ddwf.Mooee(src_o, r_o); std::cout << GridLogMessage << "Applied Moo" << std::endl;
setCheckerboard(r_eeoo, r_e);
setCheckerboard(r_eeoo, r_o);
r_eo = r_eo + r_eeoo;
Ddwf.M(src, ref);
// std::cout << GridLogMessage << r_eo << std::endl;
// std::cout << GridLogMessage << ref << std::endl;
err = ref - r_eo;
std::cout << GridLogMessage << "EO norm diff " << norm2(err) << " " << norm2(ref) << " " << norm2(r_eo) << std::endl;
LatticeComplex cerr(FGrid);
cerr = localInnerProduct(err,err);
// std::cout << GridLogMessage << cerr << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl;
std::cout << GridLogMessage << "= < phi | Deo | chi > * = < chi | Deo^dag| phi> " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
LatticeFermion chi_e (FrbGrid);
LatticeFermion chi_o (FrbGrid);
LatticeFermion dchi_e(FrbGrid);
LatticeFermion dchi_o(FrbGrid);
LatticeFermion phi_e (FrbGrid);
LatticeFermion phi_o (FrbGrid);
LatticeFermion dphi_e(FrbGrid);
LatticeFermion dphi_o(FrbGrid);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd , chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd , phi_o, phi);
Ddwf.Meooe (chi_e, dchi_o);
Ddwf.Meooe (chi_o, dchi_e);
Ddwf.MeooeDag(phi_e, dphi_o);
Ddwf.MeooeDag(phi_o, dphi_e);
ComplexD pDce = innerProduct(phi_e, dchi_e);
ComplexD pDco = innerProduct(phi_o, dchi_o);
ComplexD cDpe = innerProduct(chi_e, dphi_e);
ComplexD cDpo = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl;
std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl;
std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce-conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco-conj(cDpe) << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInv Mee = 1 " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd , chi_o, chi);
Ddwf.Mooee (chi_e, src_e);
Ddwf.MooeeInv(src_e, phi_e);
Ddwf.Mooee (chi_o, src_o);
Ddwf.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInvDag MeeDag = 1 " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd , chi_o, chi);
Ddwf.MooeeDag (chi_e, src_e);
Ddwf.MooeeInvDag(src_e, phi_e);
Ddwf.MooeeDag (chi_o, src_o);
Ddwf.MooeeInvDag(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MpcDagMpc is Hermitian " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
random(RNG5, phi);
random(RNG5, chi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd , chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd , phi_o, phi);
RealD t1,t2;
SchurDiagMooeeOperator<MobiusEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2);
HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2);
HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2);
HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2);
pDce = innerProduct(phi_e, dchi_e);
pDco = innerProduct(phi_o, dchi_o);
cDpe = innerProduct(chi_e, dphi_e);
cDpo = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl;
std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl;
std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDco-conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDce-conj(cDpe) << std::endl;
Grid_finalize();
}

View File

@ -40,7 +40,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

View File

@ -51,7 +51,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

View File

@ -52,7 +52,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

View File

@ -0,0 +1,102 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/debug/Test_heatbath_dwf_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
//////////////////////////////////////////////////////////////////////////////////////////
// This program sets up the initial pseudofermion field |Phi> = Meofa^{-1/2}*|eta>, and
// then uses this Phi to compute the action <Phi|Meofa|Phi>.
// If all is working, one should find that <eta|eta> = <Phi|Meofa|Phi>.
//////////////////////////////////////////////////////////////////////////////////////////
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
// Parameters for test
const std::vector<int> grid_dim = { 8, 8, 8, 8 };
const int Ls = 8;
const int Npoles = 12;
const RealD mf = 0.01;
const RealD mpv = 1.0;
const RealD M5 = 1.8;
int main(int argc, char** argv)
{
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is set up to use " << threads << " threads" << std::endl;
// Initialize spacetime grid
std::cout << GridLogMessage << "Lattice dimensions: " << grid_dim << " Ls: " << Ls << std::endl;
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim,
GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Set up RNGs
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
// Random gauge field
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu);
DomainWallEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5);
DomainWallEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5);
// Construct the action and test the heatbath (zero initial guess)
{
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles);
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, false);
Meofa.refresh(Umu, RNG5);
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
}
// Construct the action and test the heatbath (forecasted initial guesses)
{
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles);
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, true);
Meofa.refresh(Umu, RNG5);
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
}
return 0;
}

View File

@ -0,0 +1,108 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/debug/Test_heatbath_dwf_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
//////////////////////////////////////////////////////////////////////////////////////////
// This program sets up the initial pseudofermion field |Phi> = Meofa^{-1/2}*|eta>, and
// then uses this Phi to compute the action <Phi|Meofa|Phi>.
// If all is working, one should find that <eta|eta> = <Phi|Meofa|Phi>.
//////////////////////////////////////////////////////////////////////////////////////////
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
typedef GparityWilsonImplR FermionImplPolicy;
typedef GparityDomainWallEOFAFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField;
// Parameters for test
const std::vector<int> grid_dim = { 8, 8, 8, 8 };
const int Ls = 8;
const int Npoles = 12;
const RealD mf = 0.01;
const RealD mpv = 1.0;
const RealD M5 = 1.8;
int main(int argc, char** argv)
{
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is set up to use " << threads << " threads" << std::endl;
// Initialize spacetime grid
std::cout << GridLogMessage << "Lattice dimensions: " << grid_dim << " Ls: " << Ls << std::endl;
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim,
GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Set up RNGs
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
// Random gauge field
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu);
// GparityDomainWallFermionR::ImplParams params;
FermionAction::ImplParams params;
FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, params);
FermionAction Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, params);
// Construct the action and test the heatbath (zero initial guess)
{
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles);
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, false);
Meofa.refresh(Umu, RNG5);
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
}
// Construct the action and test the heatbath (forecasted initial guesses)
{
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles);
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, true);
Meofa.refresh(Umu, RNG5);
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
}
return 0;
}

View File

@ -0,0 +1,104 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/debug/Test_heatbath_dwf_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
//////////////////////////////////////////////////////////////////////////////////////////
// This program sets up the initial pseudofermion field |Phi> = Meofa^{-1/2}*|eta>, and
// then uses this Phi to compute the action <Phi|Meofa|Phi>.
// If all is working, one should find that <eta|eta> = <Phi|Meofa|Phi>.
//////////////////////////////////////////////////////////////////////////////////////////
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
// Parameters for test
const std::vector<int> grid_dim = { 8, 8, 8, 8 };
const int Ls = 8;
const int Npoles = 12;
const RealD b = 2.5;
const RealD c = 1.5;
const RealD mf = 0.01;
const RealD mpv = 1.0;
const RealD M5 = 1.8;
int main(int argc, char** argv)
{
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is set up to use " << threads << " threads" << std::endl;
// Initialize spacetime grid
std::cout << GridLogMessage << "Lattice dimensions: " << grid_dim << " Ls: " << Ls << std::endl;
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim,
GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Set up RNGs
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
// Random gauge field
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu);
MobiusEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c);
MobiusEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c);
// Construct the action and test the heatbath (zero initial guess)
{
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles);
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, false);
Meofa.refresh(Umu, RNG5);
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
}
// Construct the action and test the heatbath (forecasted initial guesses)
{
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles);
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, true);
Meofa.refresh(Umu, RNG5);
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
}
return 0;
}

View File

@ -0,0 +1,109 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/debug/Test_heatbath_dwf_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
//////////////////////////////////////////////////////////////////////////////////////////
// This program sets up the initial pseudofermion field |Phi> = Meofa^{-1/2}*|eta>, and
// then uses this Phi to compute the action <Phi|Meofa|Phi>.
// If all is working, one should find that <eta|eta> = <Phi|Meofa|Phi>.
//////////////////////////////////////////////////////////////////////////////////////////
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
typedef GparityWilsonImplR FermionImplPolicy;
typedef GparityMobiusEOFAFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField;
// Parameters for test
const std::vector<int> grid_dim = { 8, 8, 8, 8 };
const int Ls = 8;
const int Npoles = 12;
const RealD b = 2.5;
const RealD c = 1.5;
const RealD mf = 0.01;
const RealD mpv = 1.0;
const RealD M5 = 1.8;
int main(int argc, char** argv)
{
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is set up to use " << threads << " threads" << std::endl;
// Initialize spacetime grid
std::cout << GridLogMessage << "Lattice dimensions: " << grid_dim << " Ls: " << Ls << std::endl;
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim,
GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Set up RNGs
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
// Random gauge field
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu);
FermionAction::ImplParams params;
FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c, params);
FermionAction Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c, params);
// Construct the action and test the heatbath (zero initial guess)
{
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles);
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, false);
Meofa.refresh(Umu, RNG5);
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
}
// Construct the action and test the heatbath (forecasted initial guesses)
{
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, Npoles);
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, true);
Meofa.refresh(Umu, RNG5);
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
}
return 0;
}

View File

@ -0,0 +1,206 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/debug/Test_reweight_dwf_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
// parameters for test
const std::vector<int> grid_dim = { 8, 8, 8, 8 };
const int Ls = 8;
const int Nhits = 25;
const int max_iter = 5000;
const RealD mf = 0.1;
const RealD mb = 0.11;
const RealD M5 = 1.8;
const RealD stop_tol = 1.0e-12;
RealD mean(const std::vector<RealD>& data)
{
int N = data.size();
RealD mean(0.0);
for(int i=0; i<N; ++i){ mean += data[i]; }
return mean/RealD(N);
}
RealD jack_mean(const std::vector<RealD>& data, int sample)
{
int N = data.size();
RealD mean(0.0);
for(int i=0; i<N; ++i){ if(i != sample){ mean += data[i]; } }
return mean/RealD(N-1);
}
RealD jack_std(const std::vector<RealD>& jacks, RealD mean)
{
int N = jacks.size();
RealD std(0.0);
for(int i=0; i<N; ++i){ std += std::pow(jacks[i]-mean, 2.0); }
return std::sqrt(RealD(N-1)/RealD(N)*std);
}
std::vector<RealD> jack_stats(const std::vector<RealD>& data)
{
int N = data.size();
std::vector<RealD> jack_samples(N);
std::vector<RealD> jack_stats(2);
jack_stats[0] = mean(data);
for(int i=0; i<N; i++){ jack_samples[i] = jack_mean(data,i); }
jack_stats[1] = jack_std(jack_samples, jack_stats[0]);
return jack_stats;
}
int main(int argc, char **argv)
{
Grid_init(&argc, &argv);
// Initialize spacetime grid
std::cout << GridLogMessage << "Lattice dimensions: "
<< grid_dim << " Ls: " << Ls << std::endl;
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim,
GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Set up RNGs
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
// Random gauge field
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu);
// Initialize RHMC fermion operators
DomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5);
DomainWallFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5);
SchurDiagMooeeOperator<DomainWallFermionR, LatticeFermion> MdagM(Ddwf_f);
SchurDiagMooeeOperator<DomainWallFermionR, LatticeFermion> VdagV(Ddwf_b);
// Degree 12 rational approximations to x^(1/4) and x^(-1/4)
double lo = 0.0001;
double hi = 95.0;
int precision = 64;
int degree = 12;
AlgRemez remez(lo, hi, precision);
std::cout << GridLogMessage << "Generating degree " << degree << " for x^(1/4)" << std::endl;
remez.generateApprox(degree, 1, 4);
MultiShiftFunction PowerQuarter(remez, stop_tol, false);
MultiShiftFunction PowerNegQuarter(remez, stop_tol, true);
// Stochastically estimate reweighting factor via RHMC
RealD scale = std::sqrt(0.5);
std::vector<RealD> rw_rhmc(Nhits);
ConjugateGradientMultiShift<LatticeFermion> msCG_V(max_iter, PowerQuarter);
ConjugateGradientMultiShift<LatticeFermion> msCG_M(max_iter, PowerNegQuarter);
std::cout.precision(12);
for(int hit=0; hit<Nhits; hit++){
// Gaussian source
LatticeFermion Phi (Ddwf_f.FermionGrid());
LatticeFermion PhiOdd (Ddwf_f.FermionRedBlackGrid());
std::vector<LatticeFermion> tmp(2, Ddwf_f.FermionRedBlackGrid());
gaussian(RNG5, Phi);
Phi = Phi*scale;
pickCheckerboard(Odd, PhiOdd, Phi);
// evaluate -log(rw)
msCG_V(VdagV, PhiOdd, tmp[0]);
msCG_M(MdagM, tmp[0], tmp[1]);
rw_rhmc[hit] = norm2(tmp[1]) - norm2(PhiOdd);
std::cout << std::endl << "==================================================" << std::endl;
std::cout << " --- RHMC: Hit " << hit << ": rw = " << rw_rhmc[hit];
std::cout << std::endl << "==================================================" << std::endl << std::endl;
}
// Initialize EOFA fermion operators
RealD shift_L = 0.0;
RealD shift_R = -1.0;
int pm = 1;
DomainWallEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5);
DomainWallEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5);
MdagMLinearOperator<DomainWallEOFAFermionR, LatticeFermion> LdagL(Deofa_L);
MdagMLinearOperator<DomainWallEOFAFermionR, LatticeFermion> RdagR(Deofa_R);
// Stochastically estimate reweighting factor via EOFA
RealD k = Deofa_L.k;
std::vector<RealD> rw_eofa(Nhits);
ConjugateGradient<LatticeFermion> CG(stop_tol, max_iter);
SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
for(int hit=0; hit<Nhits; hit++){
// Gaussian source
LatticeFermion Phi (Deofa_L.FermionGrid());
LatticeFermion spProj_Phi(Deofa_L.FermionGrid());
std::vector<LatticeFermion> tmp(2, Deofa_L.FermionGrid());
gaussian(RNG5, Phi);
Phi = Phi*scale;
// evaluate -log(rw)
// LH term
for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(spProj_Phi, 0.0, Phi, 1.0, Phi, s, s); }
Deofa_L.Omega(spProj_Phi, tmp[0], -1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
SchurSolver(Deofa_L, tmp[1], tmp[0]);
Deofa_L.Omega(tmp[0], tmp[1], -1, 1);
rw_eofa[hit] = -k*innerProduct(spProj_Phi,tmp[1]).real();
// RH term
for(int s=0; s<Ls; ++s){ axpby_ssp_pplus(spProj_Phi, 0.0, Phi, 1.0, Phi, s, s); }
Deofa_R.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
SchurSolver(Deofa_R, tmp[1], tmp[0]);
Deofa_R.Omega(tmp[0], tmp[1], 1, 1);
rw_eofa[hit] += k*innerProduct(spProj_Phi,tmp[1]).real();
std::cout << std::endl << "==================================================" << std::endl;
std::cout << " --- EOFA: Hit " << hit << ": rw = " << rw_eofa[hit];
std::cout << std::endl << "==================================================" << std::endl << std::endl;
}
std::vector<RealD> rhmc_result = jack_stats(rw_rhmc);
std::vector<RealD> eofa_result = jack_stats(rw_eofa);
std::cout << std::endl << "RHMC: rw = " << rhmc_result[0] << " +/- " << rhmc_result[1] << std::endl;
std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,209 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/debug/Test_reweight_dwf_eofa_gparity.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
typedef typename GparityDomainWallFermionR::FermionField FermionField;
// parameters for test
const std::vector<int> grid_dim = { 8, 8, 8, 8 };
const int Ls = 8;
const int Nhits = 10;
const int max_iter = 5000;
const RealD mf = 0.1;
const RealD mb = 0.11;
const RealD M5 = 1.8;
const RealD stop_tol = 1.0e-12;
RealD mean(const std::vector<RealD>& data)
{
int N = data.size();
RealD mean(0.0);
for(int i=0; i<N; ++i){ mean += data[i]; }
return mean/RealD(N);
}
RealD jack_mean(const std::vector<RealD>& data, int sample)
{
int N = data.size();
RealD mean(0.0);
for(int i=0; i<N; ++i){ if(i != sample){ mean += data[i]; } }
return mean/RealD(N-1);
}
RealD jack_std(const std::vector<RealD>& jacks, RealD mean)
{
int N = jacks.size();
RealD std(0.0);
for(int i=0; i<N; ++i){ std += std::pow(jacks[i]-mean, 2.0); }
return std::sqrt(RealD(N-1)/RealD(N)*std);
}
std::vector<RealD> jack_stats(const std::vector<RealD>& data)
{
int N = data.size();
std::vector<RealD> jack_samples(N);
std::vector<RealD> jack_stats(2);
jack_stats[0] = mean(data);
for(int i=0; i<N; i++){ jack_samples[i] = jack_mean(data,i); }
jack_stats[1] = jack_std(jack_samples, jack_stats[0]);
return jack_stats;
}
int main(int argc, char **argv)
{
Grid_init(&argc, &argv);
// Initialize spacetime grid
std::cout << GridLogMessage << "Lattice dimensions: "
<< grid_dim << " Ls: " << Ls << std::endl;
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim,
GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Set up RNGs
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
// Random gauge field
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu);
// Initialize RHMC fermion operators
GparityDomainWallFermionR::ImplParams params;
GparityDomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, params);
GparityDomainWallFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, params);
SchurDiagMooeeOperator<GparityDomainWallFermionR, FermionField> MdagM(Ddwf_f);
SchurDiagMooeeOperator<GparityDomainWallFermionR, FermionField> VdagV(Ddwf_b);
// Degree 12 rational approximations to x^(1/4) and x^(-1/4)
double lo = 0.0001;
double hi = 95.0;
int precision = 64;
int degree = 12;
AlgRemez remez(lo, hi, precision);
std::cout << GridLogMessage << "Generating degree " << degree << " for x^(1/4)" << std::endl;
remez.generateApprox(degree, 1, 4);
MultiShiftFunction PowerQuarter(remez, stop_tol, false);
MultiShiftFunction PowerNegQuarter(remez, stop_tol, true);
// Stochastically estimate reweighting factor via RHMC
RealD scale = std::sqrt(0.5);
std::vector<RealD> rw_rhmc(Nhits);
ConjugateGradientMultiShift<FermionField> msCG_V(max_iter, PowerQuarter);
ConjugateGradientMultiShift<FermionField> msCG_M(max_iter, PowerNegQuarter);
std::cout.precision(12);
for(int hit=0; hit<Nhits; hit++){
// Gaussian source
FermionField Phi (Ddwf_f.FermionGrid());
FermionField PhiOdd (Ddwf_f.FermionRedBlackGrid());
std::vector<FermionField> tmp(2, Ddwf_f.FermionRedBlackGrid());
gaussian(RNG5, Phi);
Phi = Phi*scale;
pickCheckerboard(Odd, PhiOdd, Phi);
// evaluate -log(rw)
msCG_V(VdagV, PhiOdd, tmp[0]);
msCG_M(MdagM, tmp[0], tmp[1]);
rw_rhmc[hit] = norm2(tmp[1]) - norm2(PhiOdd);
std::cout << std::endl << "==================================================" << std::endl;
std::cout << " --- RHMC: Hit " << hit << ": rw = " << rw_rhmc[hit];
std::cout << std::endl << "==================================================" << std::endl << std::endl;
}
// Initialize EOFA fermion operators
RealD shift_L = 0.0;
RealD shift_R = -1.0;
int pm = 1;
GparityDomainWallEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, params);
GparityDomainWallEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, params);
MdagMLinearOperator<GparityDomainWallEOFAFermionR, FermionField> LdagL(Deofa_L);
MdagMLinearOperator<GparityDomainWallEOFAFermionR, FermionField> RdagR(Deofa_R);
// Stochastically estimate reweighting factor via EOFA
RealD k = Deofa_L.k;
std::vector<RealD> rw_eofa(Nhits);
ConjugateGradient<FermionField> CG(stop_tol, max_iter);
SchurRedBlackDiagMooeeSolve<FermionField> SchurSolver(CG);
for(int hit=0; hit<Nhits; hit++){
// Gaussian source
FermionField Phi (Deofa_L.FermionGrid());
FermionField spProj_Phi(Deofa_L.FermionGrid());
std::vector<FermionField> tmp(2, Deofa_L.FermionGrid());
gaussian(RNG5, Phi);
Phi = Phi*scale;
// evaluate -log(rw)
// LH term
for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(spProj_Phi, 0.0, Phi, 1.0, Phi, s, s); }
Deofa_L.Omega(spProj_Phi, tmp[0], -1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
SchurSolver(Deofa_L, tmp[1], tmp[0]);
Deofa_L.Omega(tmp[0], tmp[1], -1, 1);
rw_eofa[hit] = -k*innerProduct(spProj_Phi,tmp[1]).real();
// RH term
for(int s=0; s<Ls; ++s){ axpby_ssp_pplus(spProj_Phi, 0.0, Phi, 1.0, Phi, s, s); }
Deofa_R.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
SchurSolver(Deofa_R, tmp[1], tmp[0]);
Deofa_R.Omega(tmp[0], tmp[1], 1, 1);
rw_eofa[hit] += k*innerProduct(spProj_Phi,tmp[1]).real();
std::cout << std::endl << "==================================================" << std::endl;
std::cout << " --- EOFA: Hit " << hit << ": rw = " << rw_eofa[hit];
std::cout << std::endl << "==================================================" << std::endl << std::endl;
}
std::vector<RealD> rhmc_result = jack_stats(rw_rhmc);
std::vector<RealD> eofa_result = jack_stats(rw_eofa);
std::cout << std::endl << "RHMC: rw = " << rhmc_result[0] << " +/- " << rhmc_result[1] << std::endl;
std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,215 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/debug/Test_reweight_dwf_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
// parameters for test
const std::vector<int> grid_dim = { 8, 8, 8, 8 };
const int Ls = 8;
const int Nhits = 10;
const int max_iter = 5000;
const RealD b = 2.5;
const RealD c = 1.5;
const RealD mf = 0.1;
const RealD mb = 0.11;
const RealD M5 = 1.8;
const RealD stop_tol = 1.0e-12;
RealD mean(const std::vector<RealD>& data)
{
int N = data.size();
RealD mean(0.0);
for(int i=0; i<N; ++i){ mean += data[i]; }
return mean/RealD(N);
}
RealD jack_mean(const std::vector<RealD>& data, int sample)
{
int N = data.size();
RealD mean(0.0);
for(int i=0; i<N; ++i){ if(i != sample){ mean += data[i]; } }
return mean/RealD(N-1);
}
RealD jack_std(const std::vector<RealD>& jacks, RealD mean)
{
int N = jacks.size();
RealD std(0.0);
for(int i=0; i<N; ++i){ std += std::pow(jacks[i]-mean, 2.0); }
return std::sqrt(RealD(N-1)/RealD(N)*std);
}
std::vector<RealD> jack_stats(const std::vector<RealD>& data)
{
int N = data.size();
std::vector<RealD> jack_samples(N);
std::vector<RealD> jack_stats(2);
jack_stats[0] = mean(data);
for(int i=0; i<N; i++){ jack_samples[i] = jack_mean(data,i); }
jack_stats[1] = jack_std(jack_samples, jack_stats[0]);
return jack_stats;
}
int main(int argc, char **argv)
{
Grid_init(&argc, &argv);
// Initialize spacetime grid
std::cout << GridLogMessage << "Lattice dimensions: "
<< grid_dim << " Ls: " << Ls << std::endl;
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim,
GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Set up RNGs
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
// Random gauge field
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu);
// Initialize RHMC fermion operators
MobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c);
MobiusFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, b, c);
SchurDiagMooeeOperator<MobiusFermionR, LatticeFermion> MdagM(Ddwf_f);
SchurDiagMooeeOperator<MobiusFermionR, LatticeFermion> VdagV(Ddwf_b);
// Degree 12 rational approximations to x^(1/4) and x^(-1/4)
double lo = 0.0001;
double hi = 95.0;
int precision = 64;
int degree = 12;
AlgRemez remez(lo, hi, precision);
std::cout << GridLogMessage << "Generating degree " << degree << " for x^(1/4)" << std::endl;
remez.generateApprox(degree, 1, 4);
MultiShiftFunction PowerQuarter(remez, stop_tol, false);
MultiShiftFunction PowerNegQuarter(remez, stop_tol, true);
// Stochastically estimate reweighting factor via RHMC
RealD scale = std::sqrt(0.5);
std::vector<RealD> rw_rhmc(Nhits);
ConjugateGradientMultiShift<LatticeFermion> msCG_V(max_iter, PowerQuarter);
ConjugateGradientMultiShift<LatticeFermion> msCG_M(max_iter, PowerNegQuarter);
std::cout.precision(12);
for(int hit=0; hit<Nhits; hit++){
// Gaussian source
LatticeFermion Phi (Ddwf_f.FermionGrid());
LatticeFermion PhiOdd (Ddwf_f.FermionRedBlackGrid());
std::vector<LatticeFermion> tmp(2, Ddwf_f.FermionRedBlackGrid());
gaussian(RNG5, Phi);
Phi = Phi*scale;
pickCheckerboard(Odd, PhiOdd, Phi);
// evaluate -log(rw)
msCG_V(VdagV, PhiOdd, tmp[0]);
msCG_M(MdagM, tmp[0], tmp[1]);
rw_rhmc[hit] = norm2(tmp[1]) - norm2(PhiOdd);
std::cout << std::endl << "==================================================" << std::endl;
std::cout << " --- RHMC: Hit " << hit << ": rw = " << rw_rhmc[hit];
std::cout << std::endl << "==================================================" << std::endl << std::endl;
}
// Initialize EOFA fermion operators
RealD shift_L = 0.0;
RealD shift_R = -1.0;
int pm = 1;
MobiusEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, b, c);
MobiusEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, b, c);
MdagMLinearOperator<MobiusEOFAFermionR, LatticeFermion> LdagL(Deofa_L);
MdagMLinearOperator<MobiusEOFAFermionR, LatticeFermion> RdagR(Deofa_R);
// Stochastically estimate reweighting factor via EOFA
RealD k = Deofa_L.k;
std::vector<RealD> rw_eofa(Nhits);
ConjugateGradient<LatticeFermion> CG(stop_tol, max_iter);
SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
// Compute -log(Z), where: ( RHMC det ratio ) = Z * ( EOFA det ratio )
RealD Z = std::pow(b+c+1.0,Ls) + mf*std::pow(b+c-1.0,Ls);
Z /= std::pow(b+c+1.0,Ls) + mb*std::pow(b+c-1.0,Ls);
Z = -12.0*grid_dim[0]*grid_dim[1]*grid_dim[2]*grid_dim[3]*std::log(Z);
for(int hit=0; hit<Nhits; hit++){
// Gaussian source
LatticeFermion Phi (Deofa_L.FermionGrid());
LatticeFermion spProj_Phi(Deofa_L.FermionGrid());
std::vector<LatticeFermion> tmp(2, Deofa_L.FermionGrid());
gaussian(RNG5, Phi);
Phi = Phi*scale;
// evaluate -log(rw)
// LH term
for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(spProj_Phi, 0.0, Phi, 1.0, Phi, s, s); }
Deofa_L.Omega(spProj_Phi, tmp[0], -1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
SchurSolver(Deofa_L, tmp[1], tmp[0]);
Deofa_L.Dtilde(tmp[0], tmp[1]);
Deofa_L.Omega(tmp[1], tmp[0], -1, 1);
rw_eofa[hit] = Z - k*innerProduct(spProj_Phi,tmp[0]).real();
// RH term
for(int s=0; s<Ls; ++s){ axpby_ssp_pplus(spProj_Phi, 0.0, Phi, 1.0, Phi, s, s); }
Deofa_R.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
SchurSolver(Deofa_R, tmp[1], tmp[0]);
Deofa_R.Dtilde(tmp[0], tmp[1]);
Deofa_R.Omega(tmp[1], tmp[0], 1, 1);
rw_eofa[hit] += k*innerProduct(spProj_Phi,tmp[0]).real();
std::cout << std::endl << "==================================================" << std::endl;
std::cout << " --- EOFA: Hit " << hit << ": rw = " << rw_eofa[hit];
std::cout << std::endl << "==================================================" << std::endl << std::endl;
}
std::vector<RealD> rhmc_result = jack_stats(rw_rhmc);
std::vector<RealD> eofa_result = jack_stats(rw_eofa);
std::cout << std::endl << "RHMC: rw = " << rhmc_result[0] << " +/- " << rhmc_result[1] << std::endl;
std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,218 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/debug/Test_reweight_dwf_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
typedef typename GparityDomainWallFermionR::FermionField FermionField;
// parameters for test
const std::vector<int> grid_dim = { 8, 8, 8, 8 };
const int Ls = 8;
const int Nhits = 10;
const int max_iter = 5000;
const RealD b = 2.5;
const RealD c = 1.5;
const RealD mf = 0.1;
const RealD mb = 0.11;
const RealD M5 = 1.8;
const RealD stop_tol = 1.0e-12;
RealD mean(const std::vector<RealD>& data)
{
int N = data.size();
RealD mean(0.0);
for(int i=0; i<N; ++i){ mean += data[i]; }
return mean/RealD(N);
}
RealD jack_mean(const std::vector<RealD>& data, int sample)
{
int N = data.size();
RealD mean(0.0);
for(int i=0; i<N; ++i){ if(i != sample){ mean += data[i]; } }
return mean/RealD(N-1);
}
RealD jack_std(const std::vector<RealD>& jacks, RealD mean)
{
int N = jacks.size();
RealD std(0.0);
for(int i=0; i<N; ++i){ std += std::pow(jacks[i]-mean, 2.0); }
return std::sqrt(RealD(N-1)/RealD(N)*std);
}
std::vector<RealD> jack_stats(const std::vector<RealD>& data)
{
int N = data.size();
std::vector<RealD> jack_samples(N);
std::vector<RealD> jack_stats(2);
jack_stats[0] = mean(data);
for(int i=0; i<N; i++){ jack_samples[i] = jack_mean(data,i); }
jack_stats[1] = jack_std(jack_samples, jack_stats[0]);
return jack_stats;
}
int main(int argc, char **argv)
{
Grid_init(&argc, &argv);
// Initialize spacetime grid
std::cout << GridLogMessage << "Lattice dimensions: "
<< grid_dim << " Ls: " << Ls << std::endl;
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim,
GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Set up RNGs
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
// Random gauge field
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu);
// Initialize RHMC fermion operators
GparityDomainWallFermionR::ImplParams params;
GparityMobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c, params);
GparityMobiusFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5, b, c, params);
SchurDiagMooeeOperator<GparityMobiusFermionR, FermionField> MdagM(Ddwf_f);
SchurDiagMooeeOperator<GparityMobiusFermionR, FermionField> VdagV(Ddwf_b);
// Degree 12 rational approximations to x^(1/4) and x^(-1/4)
double lo = 0.0001;
double hi = 95.0;
int precision = 64;
int degree = 12;
AlgRemez remez(lo, hi, precision);
std::cout << GridLogMessage << "Generating degree " << degree << " for x^(1/4)" << std::endl;
remez.generateApprox(degree, 1, 4);
MultiShiftFunction PowerQuarter(remez, stop_tol, false);
MultiShiftFunction PowerNegQuarter(remez, stop_tol, true);
// Stochastically estimate reweighting factor via RHMC
RealD scale = std::sqrt(0.5);
std::vector<RealD> rw_rhmc(Nhits);
ConjugateGradientMultiShift<FermionField> msCG_V(max_iter, PowerQuarter);
ConjugateGradientMultiShift<FermionField> msCG_M(max_iter, PowerNegQuarter);
std::cout.precision(12);
for(int hit=0; hit<Nhits; hit++){
// Gaussian source
FermionField Phi (Ddwf_f.FermionGrid());
FermionField PhiOdd (Ddwf_f.FermionRedBlackGrid());
std::vector<FermionField> tmp(2, Ddwf_f.FermionRedBlackGrid());
gaussian(RNG5, Phi);
Phi = Phi*scale;
pickCheckerboard(Odd, PhiOdd, Phi);
// evaluate -log(rw)
msCG_V(VdagV, PhiOdd, tmp[0]);
msCG_M(MdagM, tmp[0], tmp[1]);
rw_rhmc[hit] = norm2(tmp[1]) - norm2(PhiOdd);
std::cout << std::endl << "==================================================" << std::endl;
std::cout << " --- RHMC: Hit " << hit << ": rw = " << rw_rhmc[hit];
std::cout << std::endl << "==================================================" << std::endl << std::endl;
}
// Initialize EOFA fermion operators
RealD shift_L = 0.0;
RealD shift_R = -1.0;
int pm = 1;
GparityMobiusEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5, b, c, params);
GparityMobiusEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5, b, c, params);
MdagMLinearOperator<GparityMobiusEOFAFermionR, FermionField> LdagL(Deofa_L);
MdagMLinearOperator<GparityMobiusEOFAFermionR, FermionField> RdagR(Deofa_R);
// Stochastically estimate reweighting factor via EOFA
RealD k = Deofa_L.k;
std::vector<RealD> rw_eofa(Nhits);
ConjugateGradient<FermionField> CG(stop_tol, max_iter);
SchurRedBlackDiagMooeeSolve<FermionField> SchurSolver(CG);
// Compute -log(Z), where: ( RHMC det ratio ) = Z * ( EOFA det ratio )
RealD Z = std::pow(b+c+1.0,Ls) + mf*std::pow(b+c-1.0,Ls);
Z /= std::pow(b+c+1.0,Ls) + mb*std::pow(b+c-1.0,Ls);
Z = -12.0*grid_dim[0]*grid_dim[1]*grid_dim[2]*grid_dim[3]*std::log(Z);
for(int hit=0; hit<Nhits; hit++){
// Gaussian source
FermionField Phi (Deofa_L.FermionGrid());
FermionField spProj_Phi(Deofa_L.FermionGrid());
std::vector<FermionField> tmp(2, Deofa_L.FermionGrid());
gaussian(RNG5, Phi);
Phi = Phi*scale;
// evaluate -log(rw)
// LH term
for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(spProj_Phi, 0.0, Phi, 1.0, Phi, s, s); }
Deofa_L.Omega(spProj_Phi, tmp[0], -1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
SchurSolver(Deofa_L, tmp[1], tmp[0]);
Deofa_L.Dtilde(tmp[0], tmp[1]);
Deofa_L.Omega(tmp[1], tmp[0], -1, 1);
rw_eofa[hit] = 2.0*Z - k*innerProduct(spProj_Phi,tmp[0]).real();
// RH term
for(int s=0; s<Ls; ++s){ axpby_ssp_pplus(spProj_Phi, 0.0, Phi, 1.0, Phi, s, s); }
Deofa_R.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
SchurSolver(Deofa_R, tmp[1], tmp[0]);
Deofa_R.Dtilde(tmp[0], tmp[1]);
Deofa_R.Omega(tmp[1], tmp[0], 1, 1);
rw_eofa[hit] += k*innerProduct(spProj_Phi,tmp[0]).real();
std::cout << std::endl << "==================================================" << std::endl;
std::cout << " --- EOFA: Hit " << hit << ": rw = " << rw_eofa[hit];
std::cout << std::endl << "==================================================" << std::endl << std::endl;
}
std::vector<RealD> rhmc_result = jack_stats(rw_rhmc);
std::vector<RealD> eofa_result = jack_stats(rw_eofa);
std::cout << std::endl << "RHMC: rw = " << rhmc_result[0] << " +/- " << rhmc_result[1] << std::endl;
std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,164 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/forces/Test_dwf_force_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char** argv)
{
Grid_init(&argc, &argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
const int Ls = 8;
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Want a different conf at every run
// First create an instance of an engine.
std::random_device rnd_device;
// Specify the engine and distribution.
std::mt19937 mersenne_engine(rnd_device());
std::uniform_int_distribution<int> dist(1, 100);
auto gen = std::bind(dist, mersenne_engine);
std::vector<int> seeds4(4);
generate(begin(seeds4), end(seeds4), gen);
//std::vector<int> seeds4({1,2,3,5});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
LatticeFermion phi (FGrid); gaussian(RNG5, phi);
LatticeFermion Mphi (FGrid);
LatticeFermion MphiPrime (FGrid);
LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U);
////////////////////////////////////
// Unmodified matrix element
////////////////////////////////////
RealD mf = 0.01;
RealD mb = 1.0;
RealD M5 = 1.8;
DomainWallEOFAFermionR Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5);
DomainWallEOFAFermionR Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5);
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12);
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, true);
Meofa.refresh(U, RNG5);
RealD S = Meofa.S(U); // pdag M p
// get the deriv of phidag M phi with respect to "U"
LatticeGaugeField UdSdU(UGrid);
Meofa.deriv(U, UdSdU);
////////////////////////////////////
// Modify the gauge field a little
////////////////////////////////////
RealD dt = 0.0001;
LatticeColourMatrix mommu(UGrid);
LatticeColourMatrix forcemu(UGrid);
LatticeGaugeField mom(UGrid);
LatticeGaugeField Uprime(UGrid);
for(int mu=0; mu<Nd; mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom, mommu, mu);
// fourth order exponential approx
parallel_for(auto i=mom.begin(); i<mom.end(); i++){
Uprime[i](mu) = U[i](mu) + mom[i](mu)*U[i](mu)*dt + mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt/2.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt/6.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt/24.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt*dt/120.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt*dt*dt/720.0);
}
}
/*Ddwf.ImportGauge(Uprime);
Ddwf.M (phi,MphiPrime);
ComplexD Sprime = innerProduct(MphiPrime ,MphiPrime);*/
RealD Sprime = Meofa.S(Uprime);
//////////////////////////////////////////////
// Use derivative to estimate dS
//////////////////////////////////////////////
LatticeComplex dS(UGrid);
dS = zero;
for(int mu=0; mu<Nd; mu++){
mommu = PeekIndex<LorentzIndex>(UdSdU, mu);
mommu = Ta(mommu)*2.0;
PokeIndex<LorentzIndex>(UdSdU, mommu, mu);
}
for(int mu=0; mu<Nd; mu++){
forcemu = PeekIndex<LorentzIndex>(UdSdU, mu);
mommu = PeekIndex<LorentzIndex>(mom, mu);
// Update PF action density
dS = dS + trace(mommu*forcemu)*dt;
}
ComplexD dSpred = sum(dS);
/*std::cout << GridLogMessage << " S " << S << std::endl;
std::cout << GridLogMessage << " Sprime " << Sprime << std::endl;
std::cout << GridLogMessage << "dS " << Sprime-S << std::endl;
std::cout << GridLogMessage << "predict dS " << dSpred << std::endl;*/
printf("\nS = %1.15e\n", S);
printf("Sprime = %1.15e\n", Sprime);
printf("dS = %1.15e\n", Sprime - S);
printf("real(dS_predict) = %1.15e\n", dSpred.real());
printf("imag(dS_predict) = %1.15e\n\n", dSpred.imag());
assert( fabs(real(Sprime-S-dSpred)) < 1.0 ) ;
std::cout << GridLogMessage << "Done" << std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,169 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/forces/Test_dwf_force_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
typedef GparityWilsonImplR FermionImplPolicy;
typedef GparityDomainWallEOFAFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField;
int main (int argc, char** argv)
{
Grid_init(&argc, &argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
const int Ls = 8;
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Want a different conf at every run
// First create an instance of an engine.
std::random_device rnd_device;
// Specify the engine and distribution.
std::mt19937 mersenne_engine(rnd_device());
std::uniform_int_distribution<int> dist(1, 100);
auto gen = std::bind(dist, mersenne_engine);
std::vector<int> seeds4(4);
generate(begin(seeds4), end(seeds4), gen);
//std::vector<int> seeds4({1,2,3,5});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
FermionField phi (FGrid); gaussian(RNG5, phi);
FermionField Mphi (FGrid);
FermionField MphiPrime (FGrid);
LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U);
////////////////////////////////////
// Unmodified matrix element
////////////////////////////////////
RealD mf = 0.01;
RealD mb = 1.0;
RealD M5 = 1.8;
FermionAction::ImplParams params;
FermionAction Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5, params);
FermionAction Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, params);
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12);
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, true);
Meofa.refresh(U, RNG5);
RealD S = Meofa.S(U); // pdag M p
// get the deriv of phidag M phi with respect to "U"
LatticeGaugeField UdSdU(UGrid);
Meofa.deriv(U, UdSdU);
////////////////////////////////////
// Modify the gauge field a little
////////////////////////////////////
RealD dt = 0.0001;
LatticeColourMatrix mommu(UGrid);
LatticeColourMatrix forcemu(UGrid);
LatticeGaugeField mom(UGrid);
LatticeGaugeField Uprime(UGrid);
for(int mu=0; mu<Nd; mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom, mommu, mu);
// fourth order exponential approx
parallel_for(auto i=mom.begin(); i<mom.end(); i++){
Uprime[i](mu) = U[i](mu) + mom[i](mu)*U[i](mu)*dt + mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt/2.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt/6.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt/24.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt*dt/120.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt*dt*dt/720.0);
}
}
/*Ddwf.ImportGauge(Uprime);
Ddwf.M (phi,MphiPrime);
ComplexD Sprime = innerProduct(MphiPrime ,MphiPrime);*/
RealD Sprime = Meofa.S(Uprime);
//////////////////////////////////////////////
// Use derivative to estimate dS
//////////////////////////////////////////////
LatticeComplex dS(UGrid);
dS = zero;
for(int mu=0; mu<Nd; mu++){
mommu = PeekIndex<LorentzIndex>(UdSdU, mu);
mommu = Ta(mommu)*2.0;
PokeIndex<LorentzIndex>(UdSdU, mommu, mu);
}
for(int mu=0; mu<Nd; mu++){
forcemu = PeekIndex<LorentzIndex>(UdSdU, mu);
mommu = PeekIndex<LorentzIndex>(mom, mu);
// Update PF action density
dS = dS + trace(mommu*forcemu)*dt;
}
ComplexD dSpred = sum(dS);
/*std::cout << GridLogMessage << " S " << S << std::endl;
std::cout << GridLogMessage << " Sprime " << Sprime << std::endl;
std::cout << GridLogMessage << "dS " << Sprime-S << std::endl;
std::cout << GridLogMessage << "predict dS " << dSpred << std::endl;*/
printf("\nS = %1.15e\n", S);
printf("Sprime = %1.15e\n", Sprime);
printf("dS = %1.15e\n", Sprime - S);
printf("real(dS_predict) = %1.15e\n", dSpred.real());
printf("imag(dS_predict) = %1.15e\n\n", dSpred.imag());
assert( fabs(real(Sprime-S-dSpred)) < 1.0 ) ;
std::cout << GridLogMessage << "Done" << std::endl;
Grid_finalize();
}

View File

@ -42,7 +42,7 @@ int main (int argc, char ** argv)
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

View File

@ -42,7 +42,7 @@ int main (int argc, char ** argv)
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

View File

@ -0,0 +1,166 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/forces/Test_dwf_force_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char** argv)
{
Grid_init(&argc, &argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
const int Ls = 8;
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Want a different conf at every run
// First create an instance of an engine.
std::random_device rnd_device;
// Specify the engine and distribution.
std::mt19937 mersenne_engine(rnd_device());
std::uniform_int_distribution<int> dist(1, 100);
auto gen = std::bind(dist, mersenne_engine);
std::vector<int> seeds4(4);
generate(begin(seeds4), end(seeds4), gen);
//std::vector<int> seeds4({1,2,3,5});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
LatticeFermion phi (FGrid); gaussian(RNG5, phi);
LatticeFermion Mphi (FGrid);
LatticeFermion MphiPrime (FGrid);
LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U);
////////////////////////////////////
// Unmodified matrix element
////////////////////////////////////
RealD b = 2.5;
RealD c = 1.5;
RealD mf = 0.01;
RealD mb = 1.0;
RealD M5 = 1.8;
MobiusEOFAFermionR Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5, b, c);
MobiusEOFAFermionR Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c);
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12);
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, false);
Meofa.refresh(U, RNG5);
RealD S = Meofa.S(U); // pdag M p
// get the deriv of phidag M phi with respect to "U"
LatticeGaugeField UdSdU(UGrid);
Meofa.deriv(U, UdSdU);
////////////////////////////////////
// Modify the gauge field a little
////////////////////////////////////
RealD dt = 0.0001;
LatticeColourMatrix mommu(UGrid);
LatticeColourMatrix forcemu(UGrid);
LatticeGaugeField mom(UGrid);
LatticeGaugeField Uprime(UGrid);
for(int mu=0; mu<Nd; mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom, mommu, mu);
// fourth order exponential approx
parallel_for(auto i=mom.begin(); i<mom.end(); i++){
Uprime[i](mu) = U[i](mu) + mom[i](mu)*U[i](mu)*dt + mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt/2.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt/6.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt/24.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt*dt/120.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt*dt*dt/720.0);
}
}
/*Ddwf.ImportGauge(Uprime);
Ddwf.M (phi,MphiPrime);
ComplexD Sprime = innerProduct(MphiPrime ,MphiPrime);*/
RealD Sprime = Meofa.S(Uprime);
//////////////////////////////////////////////
// Use derivative to estimate dS
//////////////////////////////////////////////
LatticeComplex dS(UGrid);
dS = zero;
for(int mu=0; mu<Nd; mu++){
mommu = PeekIndex<LorentzIndex>(UdSdU, mu);
mommu = Ta(mommu)*2.0;
PokeIndex<LorentzIndex>(UdSdU, mommu, mu);
}
for(int mu=0; mu<Nd; mu++){
forcemu = PeekIndex<LorentzIndex>(UdSdU, mu);
mommu = PeekIndex<LorentzIndex>(mom, mu);
// Update PF action density
dS = dS + trace(mommu*forcemu)*dt;
}
ComplexD dSpred = sum(dS);
/*std::cout << GridLogMessage << " S " << S << std::endl;
std::cout << GridLogMessage << " Sprime " << Sprime << std::endl;
std::cout << GridLogMessage << "dS " << Sprime-S << std::endl;
std::cout << GridLogMessage << "predict dS " << dSpred << std::endl;*/
printf("\nS = %1.15e\n", S);
printf("Sprime = %1.15e\n", Sprime);
printf("dS = %1.15e\n", Sprime - S);
printf("real(dS_predict) = %1.15e\n", dSpred.real());
printf("imag(dS_predict) = %1.15e\n\n", dSpred.imag());
assert( fabs(real(Sprime-S-dSpred)) < 1.0 ) ;
std::cout << GridLogMessage << "Done" << std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,171 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/forces/Test_dwf_force_eofa.cc
Copyright (C) 2017
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
typedef GparityWilsonImplR FermionImplPolicy;
typedef GparityMobiusEOFAFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField;
int main (int argc, char** argv)
{
Grid_init(&argc, &argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
const int Ls = 8;
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// Want a different conf at every run
// First create an instance of an engine.
std::random_device rnd_device;
// Specify the engine and distribution.
std::mt19937 mersenne_engine(rnd_device());
std::uniform_int_distribution<int> dist(1, 100);
auto gen = std::bind(dist, mersenne_engine);
std::vector<int> seeds4(4);
generate(begin(seeds4), end(seeds4), gen);
//std::vector<int> seeds4({1,2,3,5});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
FermionField phi (FGrid); gaussian(RNG5, phi);
FermionField Mphi (FGrid);
FermionField MphiPrime (FGrid);
LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U);
////////////////////////////////////
// Unmodified matrix element
////////////////////////////////////
RealD b = 2.5;
RealD c = 1.5;
RealD mf = 0.01;
RealD mb = 1.0;
RealD M5 = 1.8;
FermionAction::ImplParams params;
FermionAction Lop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, 0.0, -1, M5, b, c, params);
FermionAction Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c, params);
OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12);
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, false);
Meofa.refresh(U, RNG5);
RealD S = Meofa.S(U); // pdag M p
// get the deriv of phidag M phi with respect to "U"
LatticeGaugeField UdSdU(UGrid);
Meofa.deriv(U, UdSdU);
////////////////////////////////////
// Modify the gauge field a little
////////////////////////////////////
RealD dt = 0.0001;
LatticeColourMatrix mommu(UGrid);
LatticeColourMatrix forcemu(UGrid);
LatticeGaugeField mom(UGrid);
LatticeGaugeField Uprime(UGrid);
for(int mu=0; mu<Nd; mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom, mommu, mu);
// fourth order exponential approx
parallel_for(auto i=mom.begin(); i<mom.end(); i++){
Uprime[i](mu) = U[i](mu) + mom[i](mu)*U[i](mu)*dt + mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt/2.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt/6.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt/24.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt*dt/120.0)
+ mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *mom[i](mu) *U[i](mu)*(dt*dt*dt*dt*dt*dt/720.0);
}
}
/*Ddwf.ImportGauge(Uprime);
Ddwf.M (phi,MphiPrime);
ComplexD Sprime = innerProduct(MphiPrime ,MphiPrime);*/
RealD Sprime = Meofa.S(Uprime);
//////////////////////////////////////////////
// Use derivative to estimate dS
//////////////////////////////////////////////
LatticeComplex dS(UGrid);
dS = zero;
for(int mu=0; mu<Nd; mu++){
mommu = PeekIndex<LorentzIndex>(UdSdU, mu);
mommu = Ta(mommu)*2.0;
PokeIndex<LorentzIndex>(UdSdU, mommu, mu);
}
for(int mu=0; mu<Nd; mu++){
forcemu = PeekIndex<LorentzIndex>(UdSdU, mu);
mommu = PeekIndex<LorentzIndex>(mom, mu);
// Update PF action density
dS = dS + trace(mommu*forcemu)*dt;
}
ComplexD dSpred = sum(dS);
/*std::cout << GridLogMessage << " S " << S << std::endl;
std::cout << GridLogMessage << " Sprime " << Sprime << std::endl;
std::cout << GridLogMessage << "dS " << Sprime-S << std::endl;
std::cout << GridLogMessage << "predict dS " << dSpred << std::endl;*/
printf("\nS = %1.15e\n", S);
printf("Sprime = %1.15e\n", Sprime);
printf("dS = %1.15e\n", Sprime - S);
printf("real(dS_predict) = %1.15e\n", dSpred.real());
printf("imag(dS_predict) = %1.15e\n\n", dSpred.imag());
assert( fabs(real(Sprime-S-dSpred)) < 1.0 ) ;
std::cout << GridLogMessage << "Done" << std::endl;
Grid_finalize();
}

View File

@ -42,7 +42,7 @@ int main (int argc, char ** argv)
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

View File

@ -42,7 +42,7 @@ int main (int argc, char ** argv)
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

View File

@ -71,7 +71,7 @@ int main(int argc, char **argv) {
std::vector<int> simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
GridRedBlackCartesian RBGrid(latt_size, simd_layout, mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
std::vector<int> seeds({1, 2, 3, 4, 5});
GridSerialRNG sRNG;

136
tests/solver/Params.h Normal file
View File

@ -0,0 +1,136 @@
/*
Params IO
Author: Christoph Lehner
Date: 2017
*/
#define PADD(p,X) p.get(#X,X);
class Params {
protected:
std::string trim(const std::string& sc) {
std::string s = sc;
s.erase(s.begin(), std::find_if(s.begin(), s.end(),
std::not1(std::ptr_fun<int, int>(std::isspace))));
s.erase(std::find_if(s.rbegin(), s.rend(),
std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
return s;
}
public:
std::map< std::string, std::string > lines;
std::string _fn;
Params(const char* fn) : _fn(fn) {
FILE* f = fopen(fn,"rt");
assert(f);
while (!feof(f)) {
char buf[4096];
if (fgets(buf,sizeof(buf),f)) {
if (buf[0] != '#' && buf[0] != '\r' && buf[0] != '\n') {
char* sep = strchr(buf,'=');
assert(sep);
*sep = '\0';
lines[trim(buf)] = trim(sep+1);
}
}
}
fclose(f);
}
~Params() {
}
std::string loghead() {
return _fn + ": ";
}
bool has(const char* name) {
auto f = lines.find(name);
return (f != lines.end());
}
const std::string& get(const char* name) {
auto f = lines.find(name);
if (f == lines.end()) {
std::cout << Grid::GridLogMessage << loghead() << "Could not find value for " << name << std::endl;
abort();
}
return f->second;
}
void parse(std::string& s, const std::string& cval) {
std::stringstream trimmer;
trimmer << cval;
s.clear();
trimmer >> s;
}
void parse(int& i, const std::string& cval) {
assert(sscanf(cval.c_str(),"%d",&i)==1);
}
void parse(long long& i, const std::string& cval) {
assert(sscanf(cval.c_str(),"%lld",&i)==1);
}
void parse(double& f, const std::string& cval) {
assert(sscanf(cval.c_str(),"%lf",&f)==1);
}
void parse(float& f, const std::string& cval) {
assert(sscanf(cval.c_str(),"%f",&f)==1);
}
void parse(bool& b, const std::string& cval) {
std::string lcval = cval;
std::transform(lcval.begin(), lcval.end(), lcval.begin(), ::tolower);
if (lcval == "true" || lcval == "yes") {
b = true;
} else if (lcval == "false" || lcval == "no") {
b = false;
} else {
std::cout << "Invalid value for boolean: " << b << std::endl;
assert(0);
}
}
void parse(std::complex<double>& f, const std::string& cval) {
double r,i;
assert(sscanf(cval.c_str(),"%lf %lf",&r,&i)==2);
f = std::complex<double>(r,i);
}
void parse(std::complex<float>& f, const std::string& cval) {
float r,i;
assert(sscanf(cval.c_str(),"%f %f",&r,&i)==2);
f = std::complex<float>(r,i);
}
template<class T>
void get(const char* name, std::vector<T>& v) {
int i = 0;
v.resize(0);
while (true) {
char buf[4096];
sprintf(buf,"%s[%d]",name,i++);
if (!has(buf))
break;
T val;
parse(val,get(buf));
std::cout << Grid::GridLogMessage << loghead() << "Set " << buf << " to " << val << std::endl;
v.push_back(val);
}
}
template<class T>
void get(const char* name, T& f) {
parse(f,get(name));
std::cout << Grid::GridLogMessage << loghead() << "Set " << name << " to " << f << std::endl;
}
};

View File

@ -0,0 +1,727 @@
/*
Authors: Christoph Lehner
Date: 2017
Multigrid Lanczos
TODO:
High priority:
- Explore filtering of starting vector again, should really work: If cheby has 4 for low mode region and 1 for high mode, applying 15 iterations has 1e9 suppression
of high modes, which should create the desired invariant subspace already? Missing something here??? Maybe dynamic range dangerous, i.e., could also kill interesting
eigenrange if not careful.
Better: Use all Cheby up to order N in order to approximate a step function; try this! Problem: width of step function. Can kill eigenspace > 1e-3 and have < 1e-5 equal
to 1
Low priority:
- Given that I seem to need many restarts and high degree poly to create the base and this takes about 1 day, seriously consider a simple method to create a basis
(ortho krylov low poly); and then fix up lowest say 200 eigenvalues by 1 run with high-degree poly (600 could be enough)
*/
#include <Grid/Grid.h>
#include "Params.h"
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
bool read_evals(GridBase* _grid, char* fn, std::vector<RealD>& evals) {
FILE* f = 0;
uint32_t status = 0;
if (_grid->IsBoss()) {
f = fopen(fn,"rt");
status = f ? 1 : 0;
}
_grid->GlobalSum(status);
if (!status)
return false;
uint32_t N;
if (f)
assert(fscanf(f,"%d\n",&N)==1);
else
N = 0;
_grid->GlobalSum(N);
std::cout << "Reading " << N << " eigenvalues" << std::endl;
evals.resize(N);
for (int i=0;i<N;i++) {
if (f)
assert(fscanf(f,"%lf",&evals[i])==1);
else
evals[i] = 0;
}
_grid->GlobalSumVector(&evals[0],evals.size());
if (f)
fclose(f);
return true;
}
void write_evals(char* fn, std::vector<RealD>& evals) {
FILE* f = fopen(fn,"wt");
assert(f);
int N = (int)evals.size();
fprintf(f,"%d\n",N);
for (int i=0;i<N;i++) {
fprintf(f,"%.15E\n",evals[i]);
}
fclose(f);
}
void write_history(char* fn, std::vector<RealD>& hist) {
FILE* f = fopen(fn,"wt");
assert(f);
int N = (int)hist.size();
for (int i=0;i<N;i++) {
fprintf(f,"%d %.15E\n",i,hist[i]);
}
fclose(f);
}
template<typename Field>
class FunctionHermOp : public LinearFunction<Field> {
public:
OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop;
FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop) : _poly(poly), _Linop(linop) {
}
void operator()(const Field& in, Field& out) {
_poly(_Linop,in,out);
}
};
template<typename Field>
class CheckpointedLinearFunction : public LinearFunction<Field> {
public:
LinearFunction<Field>& _op;
std::string _dir;
int _max_apply;
int _apply, _apply_actual;
GridBase* _grid;
FILE* _f;
CheckpointedLinearFunction(GridBase* grid, LinearFunction<Field>& op, const char* dir,int max_apply) : _op(op), _dir(dir), _grid(grid), _f(0),
_max_apply(max_apply), _apply(0), _apply_actual(0) {
FieldVectorIO::conditionalMkDir(dir);
char fn[4096];
sprintf(fn,"%s/ckpt_op.%4.4d",_dir.c_str(),_grid->ThisRank());
printf("CheckpointLinearFunction:: file %s\n",fn);
_f = fopen(fn,"r+b");
if (!_f)
_f = fopen(fn,"w+b");
assert(_f);
fseek(_f,0,SEEK_CUR);
}
~CheckpointedLinearFunction() {
if (_f) {
fclose(_f);
_f = 0;
}
}
bool load_ckpt(const Field& in, Field& out) {
off_t cur = ftello(_f);
fseeko(_f,0,SEEK_END);
if (cur == ftello(_f))
return false;
fseeko(_f,cur,SEEK_SET);
size_t sz = sizeof(out._odata[0]) * out._odata.size();
GridStopWatch gsw;
gsw.Start();
uint32_t crc_exp;
assert(fread(&crc_exp,4,1,_f)==1);
assert(fread(&out._odata[0],sz,1,_f)==1);
assert(FieldVectorIO::crc32_threaded((unsigned char*)&out._odata[0],sz,0x0)==crc_exp);
gsw.Stop();
printf("CheckpointLinearFunction:: reading %lld\n",(long long)sz);
std::cout << GridLogMessage << "Loading " << ((RealD)sz/1024./1024./1024.) << " GB in " << gsw.Elapsed() << std::endl;
return true;
}
void save_ckpt(const Field& in, Field& out) {
fseek(_f,0,SEEK_CUR); // switch to write
size_t sz = sizeof(out._odata[0]) * out._odata.size();
GridStopWatch gsw;
gsw.Start();
uint32_t crc = FieldVectorIO::crc32_threaded((unsigned char*)&out._odata[0],sz,0x0);
assert(fwrite(&crc,4,1,_f)==1);
assert(fwrite(&out._odata[0],sz,1,_f)==1);
fflush(_f); // try this on the GPFS to suppress OPA usage for disk during dslash; this is not needed at Lustre/JLAB
gsw.Stop();
printf("CheckpointLinearFunction:: writing %lld\n",(long long)sz);
std::cout << GridLogMessage << "Saving " << ((RealD)sz/1024./1024./1024.) << " GB in " << gsw.Elapsed() << std::endl;
}
void operator()(const Field& in, Field& out) {
_apply++;
if (load_ckpt(in,out))
return;
_op(in,out);
save_ckpt(in,out);
if (_apply_actual++ >= _max_apply) {
std::cout << GridLogMessage << "Maximum application of operator reached, checkpoint and finish in future job" << std::endl;
if (_f) { fclose(_f); _f=0; }
in._grid->Barrier();
Grid_finalize();
exit(3);
}
}
};
template<typename CoarseField,typename Field>
class ProjectedFunctionHermOp : public LinearFunction<CoarseField> {
public:
OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop;
BlockProjector<Field>& _pr;
ProjectedFunctionHermOp(BlockProjector<Field>& pr,OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop) : _poly(poly), _Linop(linop), _pr(pr) {
}
void operator()(const CoarseField& in, CoarseField& out) {
assert(_pr._bgrid._o_blocks == in._grid->oSites());
Field fin(_pr._bgrid._grid);
Field fout(_pr._bgrid._grid);
GridStopWatch gsw1,gsw2,gsw3;
// fill fin
gsw1.Start();
_pr.coarseToFine(in,fin);
gsw1.Stop();
// apply poly
gsw2.Start();
_poly(_Linop,fin,fout);
gsw2.Stop();
// fill out
gsw3.Start();
_pr.fineToCoarse(fout,out);
gsw3.Stop();
auto eps = innerProduct(in,out);
std::cout << GridLogMessage << "Operator timing details: c2f = " << gsw1.Elapsed() << " poly = " << gsw2.Elapsed() << " f2c = " << gsw3.Elapsed() <<
" Complimentary Hermiticity check: " << eps.imag() / std::abs(eps) << std::endl;
}
};
template<typename CoarseField,typename Field>
class ProjectedHermOp : public LinearFunction<CoarseField> {
public:
LinearOperatorBase<Field> &_Linop;
BlockProjector<Field>& _pr;
ProjectedHermOp(BlockProjector<Field>& pr,LinearOperatorBase<Field>& linop) : _Linop(linop), _pr(pr) {
}
void operator()(const CoarseField& in, CoarseField& out) {
assert(_pr._bgrid._o_blocks == in._grid->oSites());
Field fin(_pr._bgrid._grid);
Field fout(_pr._bgrid._grid);
_pr.coarseToFine(in,fin);
_Linop.HermOp(fin,fout);
_pr.fineToCoarse(fout,out);
}
};
template<typename Field>
class PlainHermOp : public LinearFunction<Field> {
public:
LinearOperatorBase<Field> &_Linop;
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop) {
}
void operator()(const Field& in, Field& out) {
_Linop.HermOp(in,out);
}
};
template<typename vtype, int N > using CoarseSiteFieldGeneral = iScalar< iVector<vtype, N> >;
template<int N> using CoarseSiteFieldD = CoarseSiteFieldGeneral< vComplexD, N >;
template<int N> using CoarseSiteFieldF = CoarseSiteFieldGeneral< vComplexF, N >;
template<int N> using CoarseSiteField = CoarseSiteFieldGeneral< vComplex, N >;
template<int N> using CoarseLatticeFermion = Lattice< CoarseSiteField<N> >;
template<int N> using CoarseLatticeFermionD = Lattice< CoarseSiteFieldD<N> >;
template<typename Field,int Nstop1>
void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npoly2,
int Nstop2,int Nk2,int Nm2,RealD resid2,RealD betastp2,int MaxIt,int MinRes2,
LinearOperatorBase<Field>& HermOp, std::vector<RealD>& eval1, bool cg_test_enabled,
int cg_test_maxiter,int nsingle,int SkipTest2, int MaxApply2,bool smoothed_eval_enabled,
int smoothed_eval_inner,int smoothed_eval_outer,int smoothed_eval_begin,
int smoothed_eval_end,RealD smoothed_eval_inner_resid) {
BlockedGrid<Field>& bgrid = pr._bgrid;
BasisFieldVector<Field>& basis = pr._evec;
std::vector<int> coarseFourDimLatt;
for (int i=0;i<4;i++)
coarseFourDimLatt.push_back(bgrid._nb[1+i] * bgrid._grid->_processors[1+i]);
assert(bgrid._grid->_processors[0] == 1);
std::cout << GridLogMessage << "CoarseGrid = " << coarseFourDimLatt << " with basis = " << Nstop1 << std::endl;
GridCartesian * UCoarseGrid = SpaceTimeGrid::makeFourDimGrid(coarseFourDimLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * FCoarseGrid = SpaceTimeGrid::makeFiveDimGrid(bgrid._nb[0],UCoarseGrid);
Chebyshev<Field> Cheb2(alpha2,beta,Npoly2);
CoarseLatticeFermion<Nstop1> src_coarse(FCoarseGrid);
// Second round of Lanczos in blocked space
std::vector<RealD> eval2(Nm2);
std::vector<RealD> eval3(Nm2);
BasisFieldVector<CoarseLatticeFermion<Nstop1> > coef(Nm2,FCoarseGrid);
ProjectedFunctionHermOp<CoarseLatticeFermion<Nstop1>,LatticeFermion> Op2plain(pr,Cheb2,HermOp);
CheckpointedLinearFunction<CoarseLatticeFermion<Nstop1> > Op2ckpt(src_coarse._grid,Op2plain,"checkpoint",MaxApply2);
LinearFunction< CoarseLatticeFermion<Nstop1> >* Op2;
if (MaxApply2) {
Op2 = &Op2ckpt;
} else {
Op2 = &Op2plain;
}
ProjectedHermOp<CoarseLatticeFermion<Nstop1>,LatticeFermion> Op2nopoly(pr,HermOp);
BlockImplicitlyRestartedLanczos<CoarseLatticeFermion<Nstop1> > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2);
src_coarse = 1.0;
// Precision test
{
Field tmp(bgrid._grid);
CoarseLatticeFermion<Nstop1> tmp2(FCoarseGrid);
CoarseLatticeFermion<Nstop1> tmp3(FCoarseGrid);
tmp2 = 1.0;
tmp3 = 1.0;
pr.coarseToFine(tmp2,tmp);
pr.fineToCoarse(tmp,tmp2);
tmp2 -= tmp3;
std::cout << GridLogMessage << "Precision Test c->f->c: " << norm2(tmp2) / norm2(tmp3) << std::endl;
//bgrid._grid->Barrier();
//return;
}
int Nconv;
if (!FieldVectorIO::read_compressed_vectors("lanczos.output",pr,coef) ||
!read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt",eval3) ||
!read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.linear",eval1) ||
!read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.poly",eval2)
) {
IRL2.calc(eval2,coef,src_coarse,Nconv,true,SkipTest2);
coef.resize(Nstop2);
eval2.resize(Nstop2);
eval3.resize(Nstop2);
std::vector<Field> step3_cache;
// reconstruct eigenvalues of original operator
for (int i=0;i<Nstop2;i++){
RealD eval2_linear;
if (i<Nstop1) {
eval2_linear = eval1[i];
} else {
eval2_linear = eval2[i-1];
}
RealD eval2_poly = eval2[i];
RealD eval_reconstruct = Cheb2.approxInv(eval2_poly,eval2_linear,100,1e-10);
std::cout << i << " Reconstructed eval = " << eval_reconstruct << " from quess " << eval2_linear << std::endl;
eval2[i] = eval_reconstruct;
}
// as demonstrated in CG test below, best result from mixed determination
for (int i=0;i<Nstop2;i++)
eval3[i] = (i < Nstop1) ? eval1[i] : eval2[i];
for(int i=0;i<Nstop2;i++){
std::cout << i<<" / "<< Nstop2<< " eigenvalue "<< eval3[i] <<std::endl;
};
// write
mkdir("lanczos.output",ACCESSPERMS);
FieldVectorIO::write_compressed_vectors("lanczos.output",pr,coef,nsingle);
if (bgrid._grid->IsBoss()) {
write_evals((char *)"lanczos.output/eigen-values.txt",eval3);
write_evals((char *)"lanczos.output/eigen-values.txt.linear",eval1);
write_evals((char *)"lanczos.output/eigen-values.txt.poly",eval2);
}
}
// fix up eigenvalues
if (!read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.smoothed",eval3) && smoothed_eval_enabled) {
ConjugateGradient<LatticeFermion> CG(smoothed_eval_inner_resid, smoothed_eval_inner, false);
LatticeFermion v_i(basis[0]._grid);
auto tmp = v_i;
auto tmp2 = v_i;
for (int i=smoothed_eval_begin;i<smoothed_eval_end;i++) {
GridStopWatch gsw;
gsw.Start();
pr.coarseToFine(coef[i],v_i);
v_i.checkerboard = Odd;
for (int j=0;j<smoothed_eval_outer;j++) {
tmp=zero;
//pr.deflate(coef,eval3,Nstop2,v_i,tmp);
CG(HermOp, v_i, tmp);
v_i = 1.0 / ::sqrt( norm2(tmp) ) * tmp;
}
tmp = v_i;
HermOp.HermOp(tmp,tmp2);
RealD ev = innerProduct(tmp,tmp2).real();
gsw.Stop();
std::cout << GridLogMessage << "Smoothed eigenvalue " << i << " from " << eval3[i] << " to " << ev << " in " << gsw.Elapsed() << std::endl;
// " with effective smoother precision " << (CG.ResHistory.back() / CG.ResHistory.front() ) << std::endl;
// CG.ResHistory.clear();
eval3[i] = ev;
}
if (bgrid._grid->IsBoss()) {
write_evals((char *)"lanczos.output/eigen-values.txt.smoothed",eval3);
write_evals((char *)"lanczos.output/eigen-values.txt",eval3); // also reset this to the best ones we have available
}
}
// do CG test with and without deflation
if (cg_test_enabled) {
ConjugateGradient<LatticeFermion> CG(1.0e-8, cg_test_maxiter, false);
LatticeFermion src_orig(bgrid._grid);
src_orig.checkerboard = Odd;
src_orig = 1.0;
src_orig = src_orig * (1.0 / ::sqrt(norm2(src_orig)) );
auto result = src_orig;
// undeflated solve
result = zero;
CG(HermOp, src_orig, result);
// if (UCoarseGrid->IsBoss())
// write_history("cg_test.undefl",CG.ResHistory);
// CG.ResHistory.clear();
// deflated solve with all eigenvectors
result = zero;
pr.deflate(coef,eval2,Nstop2,src_orig,result);
CG(HermOp, src_orig, result);
// if (UCoarseGrid->IsBoss())
// write_history("cg_test.defl_all",CG.ResHistory);
// CG.ResHistory.clear();
// deflated solve with non-blocked eigenvectors
result = zero;
pr.deflate(coef,eval1,Nstop1,src_orig,result);
CG(HermOp, src_orig, result);
// if (UCoarseGrid->IsBoss())
// write_history("cg_test.defl_full",CG.ResHistory);
// CG.ResHistory.clear();
// deflated solve with all eigenvectors and original eigenvalues from proj
result = zero;
pr.deflate(coef,eval3,Nstop2,src_orig,result);
CG(HermOp, src_orig, result);
// if (UCoarseGrid->IsBoss())
// write_history("cg_test.defl_all_ev3",CG.ResHistory);
// CG.ResHistory.clear();
}
}
template<typename Field>
void quick_krylov_basis(BasisFieldVector<Field>& evec,Field& src,LinearFunction<Field>& Op,int Nstop) {
Field tmp = src;
Field tmp2 = tmp;
for (int i=0;i<Nstop;i++) {
GridStopWatch gsw;
gsw.Start();
Op(tmp,tmp2);
gsw.Stop();
evec.orthogonalize(tmp2,i);
RealD nn = norm2(tmp2);
nn = Grid::sqrt(nn);
tmp2 = tmp2 * (1.0/nn);
evec[i] = tmp2;
tmp = tmp2;
std::cout << GridLogMessage << "Quick_krylov_basis: " << i << "/" << Nstop << " timing of operator=" << gsw.Elapsed() << std::endl;
}
}
int main (int argc, char ** argv) {
Grid_init(&argc,&argv);
const int MaxIt = 10000;
int Ls;
RealD mass;
RealD M5;
std::vector < std::complex<double> > omega;
RealD alpha1, alpha2, beta;
int Npoly1, Npoly2;
int Nstop1, Nstop2;
int Nk1, Nk2;
int Np1, Np2;
int MinRes1, MinRes2;
int SkipTest2, MaxApply2;
bool checkpoint_basis;
bool cg_test_enabled;
bool exit_after_basis_calculation;
bool simple_krylov_basis;
int cg_test_maxiter;
int nsingle; // store in single precision, the rest in FP16
int max_cheb_time_ms;
bool smoothed_eval_enabled;
int smoothed_eval_inner;
int smoothed_eval_outer;
int smoothed_eval_begin;
int smoothed_eval_end;
RealD smoothed_eval_inner_resid;
// vector representation
std::vector<int> block_size; // 5d block size
RealD resid1, resid2, betastp1, betastp2, basis_norm_threshold;
std::string config;
Params jp("params.txt");
PADD(jp,Npoly1); PADD(jp,Npoly2);
PADD(jp,max_cheb_time_ms);
PADD(jp,Nstop1); PADD(jp,Nstop2); PADD(jp,MaxApply2);
PADD(jp,Nk1); PADD(jp,Nk2); PADD(jp,betastp1); PADD(jp,betastp2);
PADD(jp,Np1); PADD(jp,Np2); basis_norm_threshold = 1e-5; //PADD(jp,basis_norm_threshold);
PADD(jp,block_size); PADD(jp,smoothed_eval_enabled); PADD(jp,smoothed_eval_inner);
PADD(jp,resid1); PADD(jp,resid2); PADD(jp,smoothed_eval_outer);
PADD(jp,alpha1); PADD(jp,alpha2); PADD(jp,smoothed_eval_begin);
PADD(jp,MinRes1); PADD(jp,MinRes2); PADD(jp,smoothed_eval_end);
PADD(jp,beta); PADD(jp,mass); PADD(jp,smoothed_eval_inner_resid);
PADD(jp,omega); PADD(jp,config);
PADD(jp,M5); PADD(jp,cg_test_enabled);
PADD(jp,cg_test_maxiter); PADD(jp,checkpoint_basis);
PADD(jp,nsingle); PADD(jp,exit_after_basis_calculation);
PADD(jp,simple_krylov_basis); PADD(jp,SkipTest2);
Ls = (int)omega.size();
// Grids
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * UGridHP = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * UrbGridHP = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridHP);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridCartesian * FGridHP = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridHP);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGridHP = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridHP);
// Gauge field
LatticeGaugeField Umu(UGrid);
FieldMetaData header;
NerscIO::readConfiguration(Umu,header,config);
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt()
<< " Ls: " << Ls << std::endl;
// ZMobius EO Operator
ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omega,1.,0.);
SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
// Eigenvector storage
const int Nm1 = Np1 + Nk1;
const int Nm2 = Np2 + Nk2; // maximum number of vectors we need to keep
std::cout << GridLogMessage << "Keep " << Nm1 << " full vectors" << std::endl;
std::cout << GridLogMessage << "Keep " << Nm2 << " total vectors" << std::endl;
assert(Nm2 >= Nm1);
BasisFieldVector<LatticeFermion> evec(Nm1,FrbGrid); // start off with keeping full vectors
// First and second cheby
Chebyshev<LatticeFermion> Cheb1(alpha1,beta,Npoly1);
FunctionHermOp<LatticeFermion> Op1(Cheb1,HermOp);
PlainHermOp<LatticeFermion> Op1test(HermOp);
// Eigenvalue storage
std::vector<RealD> eval1(evec.size());
// Construct source vector
LatticeFermion src(FrbGrid);
{
src=1.0;
src.checkerboard = Odd;
// normalize
RealD nn = norm2(src);
nn = Grid::sqrt(nn);
src = src * (1.0/nn);
}
// Do a benchmark and a quick exit if performance is too little (ugly but needed due to performance fluctuations)
if (max_cheb_time_ms) {
// one round of warmup
auto tmp = src;
GridStopWatch gsw1,gsw2;
gsw1.Start();
Cheb1(HermOp,src,tmp);
gsw1.Stop();
Ddwf.ZeroCounters();
gsw2.Start();
Cheb1(HermOp,src,tmp);
gsw2.Stop();
Ddwf.Report();
std::cout << GridLogMessage << "Performance check; warmup = " << gsw1.Elapsed() << " test = " << gsw2.Elapsed() << std::endl;
int ms = (int)(gsw2.useconds()/1e3);
if (ms > max_cheb_time_ms) {
std::cout << GridLogMessage << "Performance too poor: " << ms << " ms, cutoff = " << max_cheb_time_ms << " ms" << std::endl;
Grid_finalize();
return 2;
}
}
// First round of Lanczos to get low mode basis
BlockImplicitlyRestartedLanczos<LatticeFermion> IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1);
int Nconv;
char tag[1024];
if (!FieldVectorIO::read_argonne(evec,(char *)"checkpoint") || !read_evals(UGrid,(char *)"checkpoint/eigen-values.txt",eval1)) {
if (simple_krylov_basis) {
quick_krylov_basis(evec,src,Op1,Nstop1);
} else {
IRL1.calc(eval1,evec,src,Nconv,false,1);
}
evec.resize(Nstop1); // and throw away superfluous
eval1.resize(Nstop1);
if (checkpoint_basis)
FieldVectorIO::write_argonne(evec,(char *)"checkpoint");
if (UGrid->IsBoss() && checkpoint_basis)
write_evals((char *)"checkpoint/eigen-values.txt",eval1);
Ddwf.Report();
if (exit_after_basis_calculation) {
Grid_finalize();
return 0;
}
}
// now test eigenvectors
if (!simple_krylov_basis) {
for (int i=0;i<Nstop1;i++){
auto B = evec[i];
auto tmp = B;
auto v = B;
{
HermOp.HermOp(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
RealD eval2 = vnum/vden;
v -= eval2*B;
RealD vv = norm2(v);
std::cout << i << " OP eval = " << eval2 << " (" << eval1[i] << ") "
<< "res2 = " << vv << " norm2 = " << norm2(B) << std::endl;
}
}
}
// do second step only if needed
if (Nstop1 <= Nstop2) {
// Now setup blocking
assert(evec.size() == Nstop1);
BlockedGrid<LatticeFermion> bgrid(FrbGrid, block_size);
BlockProjector<LatticeFermion> pr(evec,bgrid);
pr.createOrthonormalBasis(basis_norm_threshold);
pr.createOrthonormalBasis(basis_norm_threshold); // another round due to precision issues created by local coherence
constexpr int common_basis_sizes[] = { 60, 250, 400 };
constexpr int n_common_basis_sizes = sizeof(common_basis_sizes) / sizeof(common_basis_sizes[0]);
switch (Nstop1) {
#define BASIS(n) case common_basis_sizes[n]:\
CoarseGridLanczos<LatticeFermion,common_basis_sizes[n]>\
(pr,alpha2,beta,Npoly2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2,HermOp,eval1, \
cg_test_enabled,cg_test_maxiter,nsingle,SkipTest2, \
MaxApply2,smoothed_eval_enabled,smoothed_eval_inner,smoothed_eval_outer, \
smoothed_eval_begin,smoothed_eval_end,smoothed_eval_inner_resid); break;
BASIS(0);
BASIS(1);
BASIS(2);
default:
std::cout << GridLogMessage << "Basis size " << Nstop1 << " must be added at compile-time" << std::endl;
std::cout << GridLogMessage << "Currently available sizes: " << std::endl;
for (int i=0;i<n_common_basis_sizes;i++) {
std::cout << GridLogMessage << " " << common_basis_sizes[i] << std::endl;
}
}
}
Grid_finalize();
}

View File

@ -0,0 +1,229 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_mrhs_cg.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
typedef typename DomainWallFermionR::FermionField FermionField;
typedef typename DomainWallFermionR::ComplexField ComplexField;
typename DomainWallFermionR::ImplParams params;
const int Ls=4;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> mpi_split (mpi_layout.size(),1);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
int nrhs = UGrid->RankCount() ;
/////////////////////////////////////////////
// Split into 1^4 mpi communicators
/////////////////////////////////////////////
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
mpi_split,
*UGrid);
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid);
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid);
///////////////////////////////////////////////
// Set up the problem as a 4d spreadout job
///////////////////////////////////////////////
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds);
GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds);
std::vector<FermionField> src(nrhs,FGrid);
std::vector<FermionField> src_chk(nrhs,FGrid);
std::vector<FermionField> result(nrhs,FGrid);
FermionField tmp(FGrid);
for(int s=0;s<nrhs;s++) random(pRNG5,src[s]);
for(int s=0;s<nrhs;s++) result[s]=zero;
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
///////////////////////////////////////////////////////////////
// Bounce these fields to disk
///////////////////////////////////////////////////////////////
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Writing out in parallel view "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
emptyUserRecord record;
std::string file("./scratch.scidac");
std::string filef("./scratch.scidac.ferm");
int me = UGrid->ThisRank();
LatticeGaugeField s_Umu(SGrid);
FermionField s_src(SFGrid);
FermionField s_src_split(SFGrid);
FermionField s_tmp(SFGrid);
FermionField s_res(SFGrid);
{
FGrid->Barrier();
ScidacWriter _ScidacWriter;
_ScidacWriter.open(file);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Writing out gauge field "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
_ScidacWriter.writeScidacFieldRecord(Umu,record);
_ScidacWriter.close();
FGrid->Barrier();
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Reading in gauge field "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
ScidacReader _ScidacReader;
_ScidacReader.open(file);
_ScidacReader.readScidacFieldRecord(s_Umu,record);
_ScidacReader.close();
FGrid->Barrier();
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Read in gauge field "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
}
{
for(int n=0;n<nrhs;n++){
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Writing out record "<<n<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::stringstream filefn; filefn << filef << "."<< n;
ScidacWriter _ScidacWriter;
_ScidacWriter.open(filefn.str());
_ScidacWriter.writeScidacFieldRecord(src[n],record);
_ScidacWriter.close();
}
FGrid->Barrier();
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Reading back in the single process view "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
for(int n=0;n<nrhs;n++){
if ( n==me ) {
std::stringstream filefn; filefn << filef << "."<< n;
ScidacReader _ScidacReader;
_ScidacReader.open(filefn.str());
_ScidacReader.readScidacFieldRecord(s_src,record);
_ScidacReader.close();
}
}
FGrid->Barrier();
}
///////////////////////////////////////////////////////////////
// split the source out using MPI instead of I/O
///////////////////////////////////////////////////////////////
std::cout << GridLogMessage << " Splitting the grid data "<<std::endl;
Grid_split (src,s_src_split);
std::cout << GridLogMessage << " Finished splitting the grid data "<<std::endl;
for(int n=0;n<nrhs;n++){
std::cout <<GridLogMessage<<"Full "<< n <<" "<< norm2(src[n])<<std::endl;
}
s_tmp = s_src_split - s_src;
for(int n=0;n<nrhs;n++){
FGrid->Barrier();
if ( n==me ) {
std::cerr << GridLogMessage<<"Split "<< me << " " << norm2(s_src_split) << " " << norm2(s_src)<< " diff " << norm2(s_tmp)<<std::endl;
}
FGrid->Barrier();
}
///////////////////////////////////////////////////////////////
// Set up N-solvers as trivially parallel
///////////////////////////////////////////////////////////////
RealD mass=0.01;
RealD M5=1.8;
DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5);
DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling DWF CG "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000);
s_res = zero;
CG(HermOp,s_src,s_res);
/////////////////////////////////////////////////////////////
// Report how long they all took
/////////////////////////////////////////////////////////////
std::vector<uint32_t> iterations(nrhs,0);
iterations[me] = CG.IterationsToComplete;
for(int n=0;n<nrhs;n++){
UGrid->GlobalSum(iterations[n]);
std::cout << GridLogMessage<<" Rank "<<n<<" "<< iterations[n]<<" CG iterations"<<std::endl;
}
/////////////////////////////////////////////////////////////
// Gather and residual check on the results
/////////////////////////////////////////////////////////////
std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl;
Grid_unsplit(result,s_res);
/*
Grid_unsplit(src_chk,s_src);
for(int n=0;n<nrhs;n++){
tmp = src[n]-src_chk[n];
std::cout << " src_chk "<<n<<" "<<norm2(src_chk[n])<<" " <<norm2(src[n])<<" " <<norm2(tmp)<< std::endl;
std::cout << " diff " <<tmp<<std::endl;
}
*/
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
for(int n=0;n<nrhs;n++){
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)<<std::endl;
}
Grid_finalize();
}

View File

@ -0,0 +1,144 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_mrhs_cg.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
typedef typename DomainWallFermionR::FermionField FermionField;
typedef typename DomainWallFermionR::ComplexField ComplexField;
typename DomainWallFermionR::ImplParams params;
const int Ls=4;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> mpi_split (mpi_layout.size(),1);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
int nrhs = UGrid->RankCount() ;
/////////////////////////////////////////////
// Split into 1^4 mpi communicators
/////////////////////////////////////////////
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
mpi_split,
*UGrid);
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid);
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid);
///////////////////////////////////////////////
// Set up the problem as a 4d spreadout job
///////////////////////////////////////////////
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds);
GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds);
std::vector<FermionField> src(nrhs,FGrid);
std::vector<FermionField> src_chk(nrhs,FGrid);
std::vector<FermionField> result(nrhs,FGrid);
FermionField tmp(FGrid);
for(int s=0;s<nrhs;s++) random(pRNG5,src[s]);
for(int s=0;s<nrhs;s++) result[s]=zero;
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
/////////////////
// MPI only sends
/////////////////
int me = UGrid->ThisRank();
LatticeGaugeField s_Umu(SGrid);
FermionField s_src(SFGrid);
FermionField s_tmp(SFGrid);
FermionField s_res(SFGrid);
///////////////////////////////////////////////////////////////
// split the source out using MPI instead of I/O
///////////////////////////////////////////////////////////////
Grid_split (Umu,s_Umu);
Grid_split (src,s_src);
///////////////////////////////////////////////////////////////
// Set up N-solvers as trivially parallel
///////////////////////////////////////////////////////////////
RealD mass=0.01;
RealD M5=1.8;
DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5);
DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling DWF CG "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000);
s_res = zero;
CG(HermOp,s_src,s_res);
/////////////////////////////////////////////////////////////
// Report how long they all took
/////////////////////////////////////////////////////////////
std::vector<uint32_t> iterations(nrhs,0);
iterations[me] = CG.IterationsToComplete;
for(int n=0;n<nrhs;n++){
UGrid->GlobalSum(iterations[n]);
std::cout << GridLogMessage<<" Rank "<<n<<" "<< iterations[n]<<" CG iterations"<<std::endl;
}
/////////////////////////////////////////////////////////////
// Gather and residual check on the results
/////////////////////////////////////////////////////////////
std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl;
Grid_unsplit(result,s_res);
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
for(int n=0;n<nrhs;n++){
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)<<std::endl;
}
Grid_finalize();
}

View File

@ -0,0 +1,163 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_mrhs_cg.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
typedef typename DomainWallFermionR::FermionField FermionField;
typedef typename DomainWallFermionR::ComplexField ComplexField;
typename DomainWallFermionR::ImplParams params;
const int Ls=4;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> mpi_split (mpi_layout.size(),1);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
int nrhs = UGrid->RankCount() ;
/////////////////////////////////////////////
// Split into 1^4 mpi communicators
/////////////////////////////////////////////
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
mpi_split,
*UGrid);
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid);
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid);
///////////////////////////////////////////////
// Set up the problem as a 4d spreadout job
///////////////////////////////////////////////
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds);
GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds);
std::vector<FermionField> src(nrhs,FGrid);
std::vector<FermionField> src_chk(nrhs,FGrid);
std::vector<FermionField> result(nrhs,FGrid);
FermionField tmp(FGrid);
std::vector<FermionField> src_e(nrhs,FrbGrid);
std::vector<FermionField> src_o(nrhs,FrbGrid);
for(int s=0;s<nrhs;s++) random(pRNG5,src[s]);
for(int s=0;s<nrhs;s++) result[s]=zero;
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
/////////////////
// MPI only sends
/////////////////
int me = UGrid->ThisRank();
LatticeGaugeField s_Umu(SGrid);
FermionField s_src(SFGrid);
FermionField s_src_e(SFrbGrid);
FermionField s_src_o(SFrbGrid);
FermionField s_tmp(SFGrid);
FermionField s_res(SFGrid);
///////////////////////////////////////////////////////////////
// split the source out using MPI instead of I/O
///////////////////////////////////////////////////////////////
Grid_split (Umu,s_Umu);
Grid_split (src,s_src);
///////////////////////////////////////////////////////////////
// Check even odd cases
///////////////////////////////////////////////////////////////
for(int s=0;s<nrhs;s++){
pickCheckerboard(Odd , src_o[s], src[s]);
pickCheckerboard(Even, src_e[s], src[s]);
}
Grid_split (src_e,s_src_e);
Grid_split (src_o,s_src_o);
setCheckerboard(s_tmp, s_src_o);
setCheckerboard(s_tmp, s_src_e);
s_tmp = s_tmp - s_src;
std::cout << GridLogMessage<<" EvenOdd Difference " <<norm2(s_tmp)<<std::endl;
///////////////////////////////////////////////////////////////
// Set up N-solvers as trivially parallel
///////////////////////////////////////////////////////////////
RealD mass=0.01;
RealD M5=1.8;
DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5);
DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling DWF CG "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000);
s_res = zero;
CG(HermOp,s_src,s_res);
/////////////////////////////////////////////////////////////
// Report how long they all took
/////////////////////////////////////////////////////////////
std::vector<uint32_t> iterations(nrhs,0);
iterations[me] = CG.IterationsToComplete;
for(int n=0;n<nrhs;n++){
UGrid->GlobalSum(iterations[n]);
std::cout << GridLogMessage<<" Rank "<<n<<" "<< iterations[n]<<" CG iterations"<<std::endl;
}
/////////////////////////////////////////////////////////////
// Gather and residual check on the results
/////////////////////////////////////////////////////////////
std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl;
Grid_unsplit(result,s_res);
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
for(int n=0;n<nrhs;n++){
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)<<std::endl;
}
Grid_finalize();
}

View File

@ -40,7 +40,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
std::vector<int> seeds({1,2,3,4,5});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);

View File

@ -0,0 +1,130 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_wilson_cg_unprec.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
int main (int argc, char ** argv)
{
typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField;
typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;
typename ImprovedStaggeredFermion5DR::ImplParams params;
const int Ls=8;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds);
GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds);
FermionField src(FGrid); random(pRNG5,src);
FermionField src_o(FrbGrid); pickCheckerboard(Odd,src_o,src);
FermionField result_o(FrbGrid); result_o=zero;
RealD nrm = norm2(src);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
RealD mass=0.003;
ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass);
SchurStaggeredOperator<ImprovedStaggeredFermion5DR,FermionField> HermOp(Ds);
ConjugateGradient<FermionField> CG(1.0e-8,10000);
int blockDim = 0;
BlockConjugateGradient<FermionField> BCGrQ(BlockCGrQ,blockDim,1.0e-8,10000);
BlockConjugateGradient<FermionField> BCG (BlockCG,blockDim,1.0e-8,10000);
BlockConjugateGradient<FermionField> mCG (CGmultiRHS,blockDim,1.0e-8,10000);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling 4d CG "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
ImprovedStaggeredFermionR Ds4d(Umu,Umu,*UGrid,*UrbGrid,mass);
SchurStaggeredOperator<ImprovedStaggeredFermionR,FermionField> HermOp4d(Ds4d);
FermionField src4d(UGrid); random(pRNG,src4d);
FermionField src4d_o(UrbGrid); pickCheckerboard(Odd,src4d_o,src4d);
FermionField result4d_o(UrbGrid);
result4d_o=zero;
CG(HermOp4d,src4d_o,result4d_o);
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << " Calling 5d CG for "<<Ls <<" right hand sides" <<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
Ds.ZeroCounters();
result_o=zero;
CG(HermOp,src_o,result_o);
Ds.Report();
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << " Calling multiRHS CG for "<<Ls <<" right hand sides" <<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
Ds.ZeroCounters();
result_o=zero;
mCG(HermOp,src_o,result_o);
Ds.Report();
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << " Calling Block CG for "<<Ls <<" right hand sides" <<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
Ds.ZeroCounters();
result_o=zero;
BCGrQ(HermOp,src_o,result_o);
Ds.Report();
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
Grid_finalize();
}

View File

@ -27,7 +27,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
using namespace std;
using namespace Grid;

View File

@ -57,7 +57,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
@ -71,7 +71,7 @@ int main (int argc, char ** argv)
volume=volume*latt_size[mu];
}
RealD mass=0.1;
RealD mass=0.003;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
FermionField res_o(&RBGrid);
@ -79,9 +79,14 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd,src_o,src);
res_o=zero;
SchurDiagMooeeOperator<ImprovedStaggeredFermionR,FermionField> HermOpEO(Ds);
SchurStaggeredOperator<ImprovedStaggeredFermionR,FermionField> HermOpEO(Ds);
ConjugateGradient<FermionField> CG(1.0e-8,10000);
CG(HermOpEO,src_o,res_o);
FermionField tmp(&RBGrid);
HermOpEO.Mpc(res_o,tmp);
std::cout << "check Mpc resid " << axpy_norm(tmp,-1.0,src_o,tmp)/norm2(src_o) << "\n";
Grid_finalize();
}

View File

@ -57,7 +57,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);

View File

@ -52,7 +52,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);

View File

@ -52,7 +52,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);

View File

@ -52,7 +52,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);

View File

@ -52,7 +52,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);