1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Compare commits

...

26 Commits

Author SHA1 Message Date
3064c9a6e2 Improve the matching of stencil coarsening 2020-09-08 15:36:58 -04:00
729882827c Improve the coarse matrix calc 2020-09-08 15:36:33 -04:00
baa668d3ac Merge branch 'develop' into feature/hw-multigrid
Conflicts:
	Grid/allocator/MemoryManager.h
2020-09-03 22:16:50 -04:00
3c82d16ed8 4D multigrid 2020-09-03 22:11:17 -04:00
5c8c0c2d7c BiCG 2020-09-03 22:10:40 -04:00
e5a100846c Useful tthing to preserve 2020-09-03 22:09:57 -04:00
a74e2dc12e Printing mem info 2020-09-03 22:08:44 -04:00
595f512a6e G5 for coarse space too 2020-09-03 22:07:40 -04:00
a6499b22ff Stats printing 2020-09-03 22:00:46 -04:00
b4e42a59c6 Stats traacking improvement 2020-09-03 22:00:14 -04:00
8c913e0edd Clearer UVM ttreatment 2020-09-03 21:59:05 -04:00
fd3f93d8d3 Zero changes 2020-09-03 21:57:11 -04:00
e9543cdacd Time deflation 2020-09-03 21:56:02 -04:00
98f7b3d298 Pcg 2020-09-03 21:55:05 -04:00
b7b164ea24 Test operator and deebug code 2020-09-03 21:54:20 -04:00
77124d99d5 Merge branch 'develop' into feature/hw-multigrid 2020-09-03 21:52:04 -04:00
e1327e7ea0 Optional bounds check debug code 2020-07-16 16:57:46 -04:00
569f78c2cf Stenccil improvement 2020-07-16 16:57:13 -04:00
488c79d5a1 Bound improvement 2020-07-15 19:58:08 -04:00
dc6b0f20b2 Fixed array bounds 2020-07-02 12:20:20 -04:00
c0badc3e16 Summit bounce back to git 2020-07-02 10:48:39 -04:00
58f6529b55 Slowly piecing together 2020-06-30 16:42:03 -04:00
e3f056dfbb Hw multigrid operator 2020-06-30 16:10:16 -04:00
da0ffa7a79 Two spin update defer commit to repository 2020-06-30 16:09:48 -04:00
fcc7640b9c Detect a coarsened vector 2020-06-30 14:17:45 -04:00
0cbe2859e0 Making progress on Hw based 5d coarse matrix 2020-06-30 14:17:20 -04:00
27 changed files with 1319 additions and 504 deletions

View File

@ -49,13 +49,11 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
Lattice<dotp> fine_inner_msk(fine);
// Multiply could be fused with innerProduct
// Single block sum kernel could do both masks.
fine_inner = localInnerProduct(fineX,fineY);
mult(fine_inner_msk, fine_inner,FineMask);
blockSum(CoarseInner,fine_inner_msk);
}
class Geometry {
public:
int npoint;
@ -80,8 +78,12 @@ public:
}
directions [2*_d]=0;
displacements[2*_d]=0;
}
std::cout <<GridLogMessage << "Geometry "<<std::endl;
for(int p=0;p<npoint;p++){
std::cout <<GridLogMessage << "point " <<p<<" dir "<<directions[p]<<" delta " <<displacements[p]<<std::endl;
}
}
};
template<class Fobj,class CComplex,int nbasis>
@ -102,8 +104,8 @@ public:
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
CoarseGrid(_CoarseGrid),
FineGrid(_FineGrid),
subspace(nbasis,_FineGrid),
checkerboard(_checkerboard)
checkerboard(_checkerboard),
subspace(nbasis,_FineGrid)
{
};
@ -285,6 +287,8 @@ public:
///////////////////////
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
virtual std::vector<int> Directions(void) { return geom.directions; };
virtual std::vector<int> Displacements(void){ return geom.displacements; };
void M (const CoarseVector &in, CoarseVector &out)
{
conformable(_grid,in.Grid());
@ -308,6 +312,9 @@ public:
int osites=Grid()->oSites();
autoView(st,Stencil,AcceleratorRead);
siteVector *CBp=Stencil.CommBuf();
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
int ss = sss/nbasis;
int b = sss%nbasis;
@ -318,12 +325,12 @@ public:
for(int point=0;point<geom.npoint;point++){
SE=Stencil.GetEntry(ptype,point,ss);
SE=st.GetEntry(ptype,point,ss);
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
} else {
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]);
nbr = coalescedRead(CBp[SE->_offset]);
}
acceleratorSynchronise();
@ -332,7 +339,7 @@ public:
}
}
coalescedWrite(out_v[ss](b),res);
});
});
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose();
};
@ -409,38 +416,23 @@ public:
MdirCalc(in,out[p],p);
}
};
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp)
{
this->MdirComms(in);
int ndim = in.Grid()->Nd();
//////////////
// 4D action like wilson
// 0+ => 0
// 0- => 1
// 1+ => 2
// 1- => 3
// etc..
//////////////
// 5D action like DWF
// 1+ => 0
// 1- => 1
// 2+ => 2
// 2- => 3
// etc..
auto point = [dir, disp, ndim](){
if(dir == 0 and disp == 0)
return 8;
else if ( ndim==4 ) {
return (4 * dir + 1 - disp) / 2;
} else {
return (4 * (dir-1) + 1 - disp) / 2;
}
}();
int point=-1;
for(int p=0;p<geom.npoint;p++){
if( (dir==geom.directions[p])&&(disp==geom.displacements[p])) point=p;
}
assert(point!=-1);// Must find
std::cout <<GridLogMessage << "Mdir point "<<point<<" dir "<<dir<<" disp "<<disp <<std::endl;
for(int p=0;p<geom.npoint;p++){
std::cout <<GridLogMessage << "point " <<p<<" dir "<<geom.directions[p]<<" delta " <<geom.displacements[p]<<std::endl;
}
MdirCalc(in,out,point);
};
void Mdiag(const CoarseVector &in, CoarseVector &out)
@ -456,10 +448,58 @@ public:
geom(CoarseGrid._ndimension),
hermitian(hermitian_),
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
A(geom.npoint,&CoarseGrid)
A(geom.npoint,&CoarseGrid)
{
};
void Test(Aggregation<Fobj,CComplex,nbasis> &_Aggregates,GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop)
{
typedef Lattice<Fobj> FineField;
CoarseVector Cin(_grid);
CoarseVector Cout(_grid);
CoarseVector CFout(_grid);
FineField Fin(FineGrid);
FineField Fout(FineGrid);
std::vector<int> seeds({1,2,3,4,5});
GridParallelRNG RNG(_grid); RNG.SeedFixedIntegers(seeds);
gaussian(RNG,Cin);
_Aggregates.PromoteFromSubspace(Cin,Fin);
_Aggregates.ProjectToSubspace(Cin,Fin);
std::cout << GridLogMessage<< "************ "<<std::endl;
std::cout << GridLogMessage<< " Testing M "<<std::endl;
std::cout << GridLogMessage<< "************ "<<std::endl;
// Coarse operator
this->M(Cin,Cout);
// Fine projected operator
_Aggregates.PromoteFromSubspace(Cin,Fin);
linop.Op(Fin,Fout);
_Aggregates.ProjectToSubspace(CFout,Fout);
CFout = CFout-Cout;
RealD diff = norm2(CFout);
std::cout << GridLogMessage<< " diff "<<diff<<std::endl;
assert(diff<1.0e-5);
std::cout << GridLogMessage<< "************ "<<std::endl;
std::cout << GridLogMessage<< " Testing Mdag "<<std::endl;
std::cout << GridLogMessage<< "************ "<<std::endl;
// Coarse operator
Mdag(Cin,Cout);
// Fine operator
linop.AdjOp(Fin,Fout);
_Aggregates.ProjectToSubspace(CFout,Fout);
CFout = CFout-Cout;
diff = norm2(CFout);
std::cout << GridLogMessage<< " diff "<<diff<<std::endl;
assert(diff<1.0e-5);
}
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
Aggregation<Fobj,CComplex,nbasis> & Subspace)
{
@ -496,8 +536,19 @@ public:
CoarseScalar InnerProd(Grid());
std::cout << GridLogMessage<< "CoarsenMatrix Orthog " << std::endl;
// Orthogonalise the subblocks over the basis
blockOrthogonalise(InnerProd,Subspace.subspace);
std::cout << GridLogMessage<< "CoarsenMatrix Orthog done " << std::endl;
auto OpDirections = linop.Directions();
auto OpDisplacements = linop.Displacements();
std::cout<<" Coarsening an operator with "<< OpDirections.size()<<" terms "<<std::endl;
for(int p=0;p<OpDirections.size();p++) {
assert(OpDirections[p]==geom.directions[p]);
assert(OpDisplacements[p]==geom.displacements[p]);
}
// Compute the matrix elements of linop between this orthonormal
// set of vectors.
@ -533,13 +584,27 @@ public:
evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
oddmask = one-evenmask;
/*
{
phi=Subspace.subspace[0];
linop.OpDirAll(phi,Mphi_p);
for(int p=0;p<geom.npoint-1;p++){
int dir=geom.directions[p];
int disp=geom.displacements[p];
linop.OpDir(phi,Mphi,dir,disp);
Mphi=Mphi-Mphi_p[p];
std::cout << GridLogMessage <<" Direction mapping check " <<norm2(Mphi)<<std::endl;
}
}
*/
assert(self_stencil!=-1);
int lhermitian=hermitian;
for(int i=0;i<nbasis;i++){
phi=Subspace.subspace[i];
// std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
linop.OpDirAll(phi,Mphi_p);
linop.OpDiag (phi,Mphi_p[geom.npoint-1]);
@ -550,7 +615,7 @@ public:
int dir = geom.directions[p];
int disp = geom.displacements[p];
if ( (disp==-1) || (!hermitian ) ) {
if ( (disp==-1) || (!lhermitian ) ) {
////////////////////////////////////////////////////////////////////////
// Pick out contributions coming from this cell and neighbour cell
@ -568,11 +633,23 @@ public:
autoView( A_self , A[self_stencil], AcceleratorWrite);
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
if ( lhermitian && (disp==-1) ) {
for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>*
int dirp = geom.directions[pp];
int dispp = geom.displacements[pp];
if ( (dirp==dir) && (dispp==1) ){
auto sft = conjugate(Cshift(oZProj,dir,1));
autoView( sft_v , sft , AcceleratorWrite);
autoView( A_pp , A[pp], AcceleratorWrite);
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); });
}
}
}
}
}
}
std::cout << GridLogMessage<< "CoarsenMatrix Diag "<<std::endl;
///////////////////////////////////////////
// Faster alternate self coupling.. use hermiticity to save 2x
///////////////////////////////////////////
@ -604,31 +681,35 @@ public:
}
}
if(hermitian) {
std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
ForceHermitian();
MemoryManager::PrintBytes();
// Auto self test
Test( Subspace,FineGrid,linop);
#if 0
///////////////////////////
// test code worth preserving in if block
///////////////////////////
std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl;
for(int p=0;p<geom.npoint;p++){
std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl;
std::cout<<GridLogMessage<< "\n"<<A[p] << std::endl;
}
std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl;
phi=Subspace.subspace[0];
std::vector<int> bc(FineGrid->_ndimension,0);
blockPick(Grid(),phi,tmp,bc); // Pick out a block
linop.Op(tmp,Mphi); // Apply big dop
blockProject(iProj,Mphi,Subspace.subspace); // project it and print it
std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl;
std::cout<<GridLogMessage<< iProj <<std::endl;
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
#endif
}
void ForceHermitian(void) {
CoarseMatrix Diff (Grid());
for(int p=0;p<geom.npoint;p++){
int dir = geom.directions[p];
int disp = geom.displacements[p];
if(disp==-1) {
// Find the opposite link
for(int pp=0;pp<geom.npoint;pp++){
int dirp = geom.directions[pp];
int dispp = geom.displacements[pp];
if ( (dirp==dir) && (dispp==1) ){
// Diff = adj(Cshift(A[p],dir,1)) - A[pp];
// std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
A[pp] = adj(Cshift(A[p],dir,1));
}
}
}
}
}
};
NAMESPACE_END(Grid);

View File

@ -52,6 +52,9 @@ public:
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
virtual void HermOp(const Field &in, Field &out)=0;
virtual std::vector<int> Directions(void) =0;
virtual std::vector<int> Displacements(void)=0;
};
@ -76,6 +79,9 @@ class MdagMLinearOperator : public LinearOperatorBase<Field> {
public:
MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
// Support for coarsening to a multigrid
void OpDiag (const Field &in, Field &out) {
_Mat.Mdiag(in,out);
@ -111,6 +117,8 @@ class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
Matrix &_Mat;
RealD _shift;
public:
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
// Support for coarsening to a multigrid
void OpDiag (const Field &in, Field &out) {
@ -151,6 +159,8 @@ template<class Matrix,class Field>
class HermitianLinearOperator : public LinearOperatorBase<Field> {
Matrix &_Mat;
public:
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
// Support for coarsening to a multigrid
void OpDiag (const Field &in, Field &out) {
@ -182,6 +192,8 @@ template<class Matrix,class Field>
class NonHermitianLinearOperator : public LinearOperatorBase<Field> {
Matrix &_Mat;
public:
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
// Support for coarsening to a multigrid
void OpDiag (const Field &in, Field &out) {
@ -255,6 +267,8 @@ template<class Matrix,class Field>
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
public:
Matrix &_Mat;
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
virtual void Mpc (const Field &in, Field &out) {
Field tmp(in.Grid());
@ -281,6 +295,8 @@ template<class Matrix,class Field>
protected:
Matrix &_Mat;
public:
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
virtual void Mpc (const Field &in, Field &out) {
@ -307,6 +323,8 @@ template<class Matrix,class Field>
protected:
Matrix &_Mat;
public:
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
virtual void Mpc (const Field &in, Field &out) {
@ -372,6 +390,8 @@ class NonHermitianSchurDiagMooeeOperator : public NonHermitianSchurOperatorBase
{
public:
Matrix& _Mat;
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
virtual void Mpc(const Field& in, Field& out) {
Field tmp(in.Grid());
@ -405,6 +425,8 @@ class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase<Fi
Matrix &_Mat;
public:
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
virtual void Mpc(const Field& in, Field& out) {
Field tmp(in.Grid());
@ -435,6 +457,8 @@ class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Fi
Matrix& _Mat;
public:
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
virtual void Mpc(const Field& in, Field& out) {
@ -475,6 +499,8 @@ class SchurStaggeredOperator : public SchurOperatorBase<Field> {
Field tmp;
RealD mass;
public:
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
{
assert( _Mat.isTrivialEE() );

View File

@ -48,6 +48,8 @@ public:
virtual void Mdiag (const Field &in, Field &out)=0;
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
virtual std::vector<int> Directions(void) =0;
virtual std::vector<int> Displacements(void)=0;
};
/////////////////////////////////////////////////////////////////////////////////////////////
@ -73,6 +75,8 @@ public:
virtual void MooeeDag (const Field &in, Field &out)=0;
virtual void MooeeInvDag (const Field &in, Field &out)=0;
virtual std::vector<int> Directions(void) =0;
virtual std::vector<int> Displacements(void)=0;
};
NAMESPACE_END(Grid);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -28,6 +28,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
NAMESPACE_BEGIN(Grid);
/*
* Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv.
* Script A = SolverMatrix
@ -50,53 +51,54 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
* Vout = x
*/
// abstract base
template<class Field, class CoarseField>
template<class Field, class CoarseField, class Aggregates>
class TwoLevelFlexiblePcg : public LinearFunction<Field>
{
public:
int verbose;
RealD Tolerance;
Integer MaxIterations;
const int mmax = 5;
GridBase *grid;
GridBase *coarsegrid;
const int mmax = 4;
GridBase *FineGrid;
GridBase *CoarseGrid;
LinearOperatorBase<Field> *_Linop
OperatorFunction<Field> *_Smoother,
LinearFunction<CoarseField> *_CoarseSolver;
// Need somthing that knows how to get from Coarse to fine and back again
LinearOperatorBase<Field> &_Linop;
LinearFunction<Field> &_Smoother;
LinearFunction<CoarseField> &_CoarseSolver;
Aggregates &_Aggregates;
// more most opertor functions
TwoLevelFlexiblePcg(RealD tol,
Integer maxit,
LinearOperatorBase<Field> *Linop,
LinearOperatorBase<Field> *SmootherLinop,
OperatorFunction<Field> *Smoother,
OperatorFunction<CoarseField> CoarseLinop
) :
Tolerance(tol),
MaxIterations(maxit),
_Linop(Linop),
_PreconditionerLinop(PrecLinop),
_Preconditioner(Preconditioner)
Integer maxit,
LinearOperatorBase<Field> *Linop,
LinearFunction<Field> *Smoother,
LinearFunction<CoarseField> *CoarseSolver,
Aggregates *AggP
) :
Tolerance(tol),
MaxIterations(maxit),
_Linop(*Linop),
_Smoother(*Smoother),
_CoarseSolver(*CoarseSolver),
_Aggregates(*AggP)
{
CoarseGrid=_Aggregates.CoarseGrid;
FineGrid=_Aggregates.FineGrid;
verbose=0;
};
// The Pcg routine is common to all, but the various matrices differ from derived
// implementation to derived implmentation
void operator() (const Field &src, Field &psi){
void operator() (const Field &src, Field &psi){
psi.Checkerboard() = src.Checkerboard();
grid = src.Grid();
RealD f;
RealD rtzp,rtz,a,d,b;
RealD rptzp;
RealD tn;
// RealD rptzp;
// RealD tn;
RealD guess = norm2(psi);
RealD ssq = norm2(src);
RealD rsq = ssq*Tolerance*Tolerance;
@ -104,15 +106,15 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
/////////////////////////////
// Set up history vectors
/////////////////////////////
std::vector<Field> p (mmax,grid);
std::vector<Field> mmp(mmax,grid);
std::vector<Field> p (mmax,FineGrid);
std::vector<Field> mmp(mmax,FineGrid);
std::vector<RealD> pAp(mmax);
Field x (grid); x = psi;
Field z (grid);
Field tmp(grid);
Field r (grid);
Field mu (grid);
Field x (FineGrid); x = psi;
Field z (FineGrid);
Field tmp(FineGrid);
Field r (FineGrid);
Field mu (FineGrid);
//////////////////////////
// x0 = Vstart -- possibly modify guess
@ -121,13 +123,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
Vstart(x,src);
// r0 = b -A x0
HermOp(x,mmp); // Shouldn't this be something else?
_Linop.HermOp(x,mmp[0]); // Shouldn't this be something else?
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
//////////////////////////////////
// Compute z = M1 x
//////////////////////////////////
M1(r,z,tmp,mp,SmootherMirs);
M1(r,z);
rtzp =real(innerProduct(r,z));
///////////////////////////////////////
@ -143,7 +145,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
int peri_kp = (k+1) % mmax;
rtz=rtzp;
d= M3(p[peri_k],mp,mmp[peri_k],tmp);
d= M3(p[peri_k],mmp[peri_k]);
a = rtz/d;
// Memorise this
@ -153,13 +155,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
// Compute z = M x
M1(r,z,tmp,mp);
M1(r,z);
rtzp =real(innerProduct(r,z));
M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
p[peri_kp]=p[peri_k];
p[peri_kp]=mu;
// Standard search direction p -> z + b p ; b =
b = (rtzp)/rtz;
@ -181,7 +183,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
// Stopping condition
if ( rn <= rsq ) {
HermOp(x,mmp); // Shouldn't this be something else?
_Linop.HermOp(x,mmp[0]); // Shouldn't this be something else?
axpy(tmp,-1.0,src,mmp[0]);
RealD psinorm = sqrt(norm2(x));
@ -190,7 +192,8 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
RealD true_residual = tmpnorm/srcnorm;
std::cout<<GridLogMessage<<"TwoLevelfPcg: true residual is "<<true_residual<<std::endl;
std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
return k;
return;
}
}
// Non-convergence
@ -199,48 +202,40 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
public:
virtual void M(Field & in,Field & out,Field & tmp) {
}
virtual void M1(Field & in, Field & out) {// the smoother
virtual void M1(Field & in, Field & out)
{// the smoother
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
Field tmp(grid);
Field Min(grid);
Field tmp(FineGrid);
Field Min(FineGrid);
PcgM(in,Min); // Smoother call
CoarseField PleftProj(CoarseGrid);
CoarseField PleftMss_proj(CoarseGrid);
HermOp(Min,out);
_Smoother(in,Min); // Smoother call
_Linop.HermOp(Min,out);
axpy(tmp,-1.0,out,in); // tmp = in - A Min
ProjectToSubspace(tmp,PleftProj);
ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
_Aggregates.ProjectToSubspace(PleftProj,tmp);
_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
axpy(out,1.0,Min,tmp); // Min+tmp
}
virtual void M2(const Field & in, Field & out) {
virtual void M2(const Field & in, Field & out)
{
out=in;
// Must override for Def2 only
// case PcgDef2:
// Pright(in,out);
// break;
}
virtual RealD M3(const Field & p, Field & mmp){
virtual RealD M3(const Field & p, Field & mmp)
{
double d,dd;
HermOpAndNorm(p,mmp,d,dd);
_Linop.HermOpAndNorm(p,mmp,d,dd);
return dd;
// Must override for Def1 only
// case PcgDef1:
// d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
// linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
// Pleft(mp,mmp);
// d=real(linop_d->inner(p,mmp));
}
virtual void VstartDef2(Field & xconst Field & src){
virtual void Vstart(Field & x,const Field & src)
{
//case PcgDef2:
//case PcgAdef2:
//case PcgAdef2f:
@ -256,142 +251,79 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
// = src_s - (A guess)_s - src_s + (A guess)_s
// = 0
///////////////////////////////////
Field r(grid);
Field mmp(grid);
Field r(FineGrid);
Field mmp(FineGrid);
CoarseField PleftProj(CoarseGrid);
CoarseField PleftMss_proj(CoarseGrid);
HermOp(x,mmp);
_Linop.HermOp(x,mmp);
axpy (r, -1.0, mmp, src); // r_{-1} = src - A x
ProjectToSubspace(r,PleftProj);
ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
PromoteFromSubspace(PleftMss_proj,mmp);
_Aggregates.ProjectToSubspace(PleftProj,r);
_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} r_s
_Aggregates.PromoteFromSubspace(PleftMss_proj,mmp);
x=x+mmp;
}
virtual void Vstart(Field & x,const Field & src){
return;
}
/////////////////////////////////////////////////////////////////////
// Only Def1 has non-trivial Vout. Override in Def1
/////////////////////////////////////////////////////////////////////
virtual void Vout (Field & in, Field & out,Field & src){
out = in;
//case PcgDef1:
// //Qb + PT x
// ProjectToSubspace(src,PleftProj);
// ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
// PromoteFromSubspace(PleftMss_proj,tmp);
//
// Pright(in,out);
//
// linop_d->axpy(out,tmp,out,1.0);
// break;
}
////////////////////////////////////////////////////////////////////////////////////////////////
// Pright and Pleft are common to all implementations
////////////////////////////////////////////////////////////////////////////////////////////////
virtual void Pright(Field & in,Field & out){
virtual void Pright(Field & in,Field & out)
{
// P_R = [ 1 0 ]
// [ -Mss^-1 Msb 0 ]
Field in_sbar(grid);
Field in_sbar(FineGrid);
ProjectToSubspace(in,PleftProj);
PromoteFromSubspace(PleftProj,out);
CoarseField PleftProj(CoarseGrid);
CoarseField PleftMss_proj(CoarseGrid);
_Aggregates.ProjectToSubspace(PleftProj,in);
_Aggregates.PromoteFromSubspace(PleftProj,out);
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
HermOp(in_sbar,out);
ProjectToSubspace(out,PleftProj); // Mssbar in_sbar (project)
_Linop.HermOp(in_sbar,out);
_Aggregates.ProjectToSubspace(PleftProj,out); // Mssbar in_sbar (project)
ApplyInverse (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
PromoteFromSubspace(PleftMss_proj,out); //
_CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
_Aggregates.PromoteFromSubspace(PleftMss_proj,out); //
axpy(out,-1.0,out,in_sbar); // in_sbar - Mss^{-1} Mssbar in_sbar
}
virtual void Pleft (Field & in,Field & out){
virtual void Pleft (Field & in,Field & out)
{
// P_L = [ 1 -Mbs Mss^-1]
// [ 0 0 ]
Field in_sbar(grid);
Field tmp2(grid);
Field Mtmp(grid);
Field in_sbar(FineGrid);
Field tmp2(FineGrid);
Field Mtmp(FineGrid);
ProjectToSubspace(in,PleftProj);
PromoteFromSubspace(PleftProj,out);
CoarseField PleftProj(CoarseGrid);
CoarseField PleftMss_proj(CoarseGrid);
_Aggregates.ProjectToSubspace(PleftProj,in);
_Aggregates.PromoteFromSubspace(PleftProj,out);
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
PromoteFromSubspace(PleftMss_proj,out);
_CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} in_s
_Aggregates.PromoteFromSubspace(PleftMss_proj,out);
HermOp(out,Mtmp);
_Linop.HermOp(out,Mtmp);
ProjectToSubspace(Mtmp,PleftProj); // Msbar s Mss^{-1}
PromoteFromSubspace(PleftProj,tmp2);
_Aggregates.ProjectToSubspace(PleftProj,Mtmp); // Msbar s Mss^{-1}
_Aggregates.PromoteFromSubspace(PleftProj,tmp2);
axpy(out,-1.0,tmp2,Mtmp);
axpy(out,-1.0,out,in_sbar); // in_sbar - Msbars Mss^{-1} in_s
}
}
};
NAMESPACE_END(Grid);
template<class Field>
class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp){
}
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
}
virtual void M2(Field & in, Field & out){
}
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
}
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
}
}
/*
template<class Field>
class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
}
template<class Field>
class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
virtual void Vout (Field & in, Field & out,Field & src,Field & tmp);
}
template<class Field>
class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
}
template<class Field>
class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
}
*/
#endif

View File

@ -60,6 +60,8 @@ public:
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
virtual void operator()(const Field &src,Field &guess) {
RealD t=-usecond();
guess = Zero();
assert(evec.size()==eval.size());
auto N = evec.size();
@ -68,6 +70,8 @@ public:
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
}
guess.Checkerboard() = src.Checkerboard();
t+=usecond();
std::cout<<GridLogMessage<<"\t\t\t" << "Deflated guess took "<< t/1000.0<< "ms" <<std::endl;
}
};

View File

@ -59,7 +59,7 @@ public:
GridBase *grid = src.Grid();
Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid);
psi=zero;
psi=Zero();
r = src;
Preconditioner(r,p);

View File

@ -53,7 +53,11 @@ public:
{
size_type bytes = __n*sizeof(_Tp);
profilerAllocate(bytes);
#ifdef GRID_UVM
_Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes);
#else
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
#endif
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
return ptr;
}
@ -62,7 +66,11 @@ public:
{
size_type bytes = __n * sizeof(_Tp);
profilerFree(bytes);
#ifdef GRID_UVM
MemoryManager::SharedFree((void *)__p,bytes);
#else
MemoryManager::CpuFree((void *)__p,bytes);
#endif
}
// FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop

View File

@ -9,11 +9,13 @@ NAMESPACE_BEGIN(Grid);
#define AccSmall (3)
#define Shared (4)
#define SharedSmall (5)
uint64_t total_cache;
uint64_t total_shared;
uint64_t total_device;
uint64_t total_host;;
void MemoryManager::PrintBytes(void)
{
std::cout << " MemoryManager : "<<total_cache <<" cache bytes "<<std::endl;
std::cout << " MemoryManager : "<<total_shared<<" shared bytes "<<std::endl;
std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl;
std::cout << " MemoryManager : "<<total_host <<" cpu bytes "<<std::endl;
@ -35,6 +37,8 @@ void *MemoryManager::AcceleratorAllocate(size_t bytes)
if ( ptr == (void *) NULL ) {
ptr = (void *) acceleratorAllocDevice(bytes);
total_device+=bytes;
} else {
// std::cout <<"AcceleratorAllocate: cache hit Device pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
}
return ptr;
}
@ -53,8 +57,10 @@ void *MemoryManager::SharedAllocate(size_t bytes)
if ( ptr == (void *) NULL ) {
ptr = (void *) acceleratorAllocShared(bytes);
total_shared+=bytes;
// std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
// std::cout <<"SharedAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
// PrintBytes();
} else {
// std::cout <<"SharedAllocate: cache hit Shared pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
}
return ptr;
}
@ -74,6 +80,9 @@ void *MemoryManager::CpuAllocate(size_t bytes)
if ( ptr == (void *) NULL ) {
ptr = (void *) acceleratorAllocShared(bytes);
total_host+=bytes;
// std::cout <<"CpuAllocate: allocated Cpu pointer "<<std::hex<<ptr<<std::dec<<std::endl;
} else {
// std::cout <<"CpufAllocate: cache hit Cpu pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
}
return ptr;
}
@ -120,7 +129,7 @@ void MemoryManager::Init(void)
str= getenv("GRID_ALLOC_NCACHE_LARGE");
if ( str ) {
Nc = atoi(str);
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
if ( (Nc>=0) && (Nc <= NallocCacheMax)) {
Ncache[Cpu]=Nc;
Ncache[Acc]=Nc;
Ncache[Shared]=Nc;
@ -130,7 +139,7 @@ void MemoryManager::Init(void)
str= getenv("GRID_ALLOC_NCACHE_SMALL");
if ( str ) {
Nc = atoi(str);
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
if ( (Nc>=0) && (Nc <= NallocCacheMax)) {
Ncache[CpuSmall]=Nc;
Ncache[AccSmall]=Nc;
Ncache[SharedSmall]=Nc;
@ -211,6 +220,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
if ( entries[v].valid ) {
ret = entries[v].address;
total_cache-=entries[v].bytes;
entries[v].valid = 0;
entries[v].address = NULL;
entries[v].bytes = 0;
@ -219,6 +229,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
entries[v].address=ptr;
entries[v].bytes =bytes;
entries[v].valid =1;
total_cache+=entries[v].bytes;
return ret;
}
@ -243,6 +254,7 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach
for(int e=0;e<ncache;e++){
if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
entries[e].valid = 0;
total_cache-=bytes;
return entries[e].address;
}
}

View File

@ -93,8 +93,8 @@ private:
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
static void PrintBytes(void);
public:
static void PrintBytes(void);
static void Init(void);
static void InitMessage(void);
static void *AcceleratorAllocate(size_t bytes);

View File

@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/lattice/Lattice_local.h>
#include <Grid/lattice/Lattice_reduction.h>
#include <Grid/lattice/Lattice_peekpoke.h>
//#include <Grid/lattice/Lattice_reality.h>
#include <Grid/lattice/Lattice_reality.h>
#include <Grid/lattice/Lattice_real_imag.h>
#include <Grid/lattice/Lattice_comparison_utils.h>
#include <Grid/lattice/Lattice_comparison.h>

View File

@ -52,6 +52,7 @@ public:
// This will be safe to call from accelerator_for and is trivially copy constructible
// The copy constructor for this will need to be used by device lambda functions
/////////////////////////////////////////////////////////////////////////////////////////
#undef LATTICE_BOUNDS_CHECK
template<class vobj>
class LatticeView : public LatticeAccelerator<vobj>
{
@ -61,14 +62,36 @@ public:
void * cpu_ptr;
#ifdef GRID_SIMT
accelerator_inline const typename vobj::scalar_object operator()(size_t i) const {
#ifdef LATTICE_BOUNDS_CHECK
assert(i<this->_odata_size);
assert(i>=0);
#endif
return coalescedRead(this->_odata[i]);
}
#else
accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; }
accelerator_inline const vobj & operator()(size_t i) const {
#ifdef LATTICE_BOUNDS_CHECK
assert(i<this->_odata_size);
assert(i>=0);
#endif
return this->_odata[i];
}
#endif
accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; };
accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; };
accelerator_inline const vobj & operator[](size_t i) const {
#ifdef LATTICE_BOUNDS_CHECK
assert(i<this->_odata_size);
assert(i>=0);
#endif
return this->_odata[i];
};
accelerator_inline vobj & operator[](size_t i) {
#ifdef LATTICE_BOUNDS_CHECK
assert(i<this->_odata_size);
assert(i>=0);
#endif
return this->_odata[i];
};
accelerator_inline uint64_t begin(void) const { return 0;};
accelerator_inline uint64_t end(void) const { return this->_odata_size; };

View File

@ -77,9 +77,16 @@ const int SpinorIndex = 2;
template<typename T> struct isSpinor {
static constexpr bool value = (SpinorIndex==T::TensorLevel);
};
const int CoarseIndex = 4;
template<typename T> struct isCoarsened {
static constexpr bool value = (CoarseIndex<=T::TensorLevel);
};
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
template <typename T> using IfCoarsened = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
// ChrisK very keen to add extra space for Gparity doubling.
//
// Also add domain wall index, in a way where Wilson operator

View File

@ -89,7 +89,8 @@ public:
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual std::vector<int> Directions(void) =0;
virtual std::vector<int> Displacements(void)=0;
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};

View File

@ -44,6 +44,9 @@ public:
INHERIT_IMPL_TYPES(Impl);
typedef StaggeredKernels<Impl> Kernels;
virtual std::vector<int> Directions(void) { return this->directions; };
virtual std::vector<int> Displacements(void){ return this->displacements;};
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }

View File

@ -49,6 +49,9 @@ public:
INHERIT_IMPL_TYPES(Impl);
typedef StaggeredKernels<Impl> Kernels;
virtual std::vector<int> Directions(void) { return this->directions; };
virtual std::vector<int> Displacements(void){ return this->displacements;};
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }

View File

@ -47,6 +47,9 @@ public:
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
virtual std::vector<int> Directions(void) { return this->directions; };
virtual std::vector<int> Displacements(void){ return this->displacements;};
////////////////////////////////////////
// Performance monitoring
////////////////////////////////////////

View File

@ -63,6 +63,9 @@ public:
INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels;
virtual std::vector<int> Directions(void) { return this->directions; };
virtual std::vector<int> Displacements(void){ return this->displacements;};
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////

View File

@ -72,6 +72,9 @@ public:
typedef WilsonKernels<Impl> Kernels;
PmuStat stat;
virtual std::vector<int> Directions(void) { return this->directions; };
virtual std::vector<int> Displacements(void){ return this->displacements;};
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }

View File

@ -79,6 +79,8 @@ public:
_Mat.M(in,tmp);
G5R5(out,tmp);
}
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
};
@ -127,6 +129,8 @@ public:
_Mat.M(in,tmp);
out=g5*tmp;
}
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
};
NAMESPACE_END(Grid);

View File

@ -133,14 +133,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
pickCheckerboard(Even, CloverTermEven, CloverTerm);
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm)));
pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm)));
pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv)));
pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv)));
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
}
template <class Impl>

View File

@ -128,7 +128,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
hspin(0)=fspin(0)-fspin(2);
hspin(1)=fspin(1)-fspin(3);
}
@ -138,40 +137,50 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
* 0 0 -1 0
* 0 0 0 -1
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
hspin(0)=fspin(0);
hspin(1)=fspin(1);
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
hspin(0)=fspin(2);
hspin(1)=fspin(3);
}
// template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
rfspin(0)=fspin(0);
rfspin(1)=fspin(1);
rfspin(2)=Zero();
rfspin(3)=Zero();
}
// template<class vtype> accelerator_inline void fspProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
rfspin(0)=Zero();
rfspin(1)=Zero();
rfspin(2)=fspin(2);
rfspin(3)=fspin(3);
}
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
{
const int hN = N>>1;
for(int s=0;s<hN;s++){
rfspin(s)=fspin(s);
rfspin(s+hN)=Zero();
}
}
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
{
const int hN = N>>1;
for(int s=0;s<hN;s++){
rfspin(s)=Zero();
rfspin(s+hN)=fspin(s+hN);
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Reconstruction routines to move back again to four spin
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -183,7 +192,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=timesMinusI(hspin(1));
@ -191,7 +199,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=timesI(hspin(1));
@ -199,7 +206,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)-=timesI(hspin(1));
@ -207,7 +213,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)+=timesI(hspin(1));
@ -221,7 +226,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)= hspin(1);
@ -229,7 +233,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=-hspin(1);
@ -237,7 +240,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)+=hspin(1);
@ -245,7 +247,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)-=hspin(1);
@ -260,7 +261,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=timesMinusI(hspin(0));
@ -268,7 +268,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)= timesI(hspin(0));
@ -276,7 +275,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)-=timesI(hspin(0));
@ -284,7 +282,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)+=timesI(hspin(0));
@ -298,7 +295,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=hspin(0);
@ -306,7 +302,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=-hspin(0);
@ -314,7 +309,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)+=hspin(0);
@ -322,7 +316,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)-=hspin(0);
@ -336,7 +329,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
fspin(2)=Zero();
@ -344,7 +336,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=Zero();
fspin(1)=Zero();
fspin(2)=hspin(0)+hspin(0);
@ -352,7 +343,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0)+hspin(0);
fspin(1)+=hspin(1)+hspin(1);
}
@ -372,7 +362,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
//////////
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjXp(hspin._internal[i],fspin._internal[i]);
}
@ -426,26 +415,21 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iM
}}
}
////////
// Xm
////////
template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjXm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjXm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
@ -455,19 +439,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatri
template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconXm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconXm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
@ -476,45 +457,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatr
template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconXm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconXm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
}}
}
////////
// Yp
////////
template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjYp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjYp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
@ -524,19 +497,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatri
template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconYp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconYp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
@ -545,66 +515,55 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatr
template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconYp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconYp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}
////////
// Ym
////////
template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjYm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjYm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
}}
}
template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconYm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,const iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconYm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
@ -613,19 +572,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatr
template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconYm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconYm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
@ -638,66 +594,57 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iM
////////
template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjZp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjZp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconZp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconZp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconZp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconZp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
@ -706,62 +653,53 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iM
////////
template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjZm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjZm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconZm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconZm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconZm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconZm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
@ -774,41 +712,35 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iM
////////
template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjTp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjTp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconTp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconTp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
@ -817,44 +749,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatr
template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconTp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconTp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}
////////
// Tm
////////
template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjTm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjTm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
@ -864,19 +789,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatri
template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconTm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconTm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
@ -885,44 +807,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatr
template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconTm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconTm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
}}
}
////////
// 5p
////////
template<class rtype,class vtype> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProj5p(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProj5p(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
@ -931,19 +846,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatri
template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spRecon5p(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spRecon5p(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
@ -952,19 +864,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatr
template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumRecon5p(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumRecon5p(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
@ -972,24 +881,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iM
}
// four spinor projectors for chiral proj
// template<class vtype> accelerator_inline void fspProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
template<class vtype> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProj5p(hspin._internal,fspin._internal);
}
// template<class vtype,int N> accelerator_inline void fspProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProj5p(hspin._internal[i],fspin._internal[i]);
}
}
// template<class vtype,int N> accelerator_inline void fspProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
@ -1001,17 +904,17 @@ template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &
// 5m
////////
template<class rtype,class vtype> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
spProj5m(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
for(int i=0;i<N;i++) {
spProj5m(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
@ -1021,40 +924,34 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatri
template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spRecon5m(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spRecon5m(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumRecon5m(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumRecon5m(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
@ -1063,24 +960,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iM
// four spinor projectors for chiral proj
// template<class vtype> accelerator_inline void fspProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
template<class vtype> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProj5m(hspin._internal,fspin._internal);
}
// template<class vtype,int N> accelerator_inline void fspProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProj5m(hspin._internal[i],fspin._internal[i]);
}
}
// template<class vtype,int N> accelerator_inline void fspProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);

View File

@ -154,8 +154,8 @@ void axpby_ssp_pminus(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,co
accelerator_for(sss,nloop,vobj::Nsimd(),{
uint64_t ss = sss*Ls;
decltype(coalescedRead(y_v[ss+sp])) tmp;
spProj5m(tmp,y_v(ss+sp));
tmp = a*x_v(ss+s)+b*tmp;
spProj5m(tmp,y_v(ss+sp));
tmp = a*x_v(ss+s)+b*tmp;
coalescedWrite(z_v[ss+s],tmp);
});
}
@ -188,7 +188,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
z.Checkerboard() = x.Checkerboard();
conformable(x,z);
int Ls = grid->_rdimensions[0];
Gamma G5(Gamma::Algebra::Gamma5);
autoView( x_v, x, AcceleratorRead);
autoView( z_v, z, AcceleratorWrite);
uint64_t nloop = grid->oSites()/Ls;
@ -196,7 +195,13 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
uint64_t ss = sss*Ls;
for(int s=0;s<Ls;s++){
int sp = Ls-1-s;
coalescedWrite(z_v[ss+sp],G5*x_v(ss+s));
auto tmp = x_v(ss+s);
decltype(tmp) tmp_p;
decltype(tmp) tmp_m;
spProj5p(tmp_p,tmp);
spProj5m(tmp_m,tmp);
// Use of spProj5m, 5p captures the coarse space too
coalescedWrite(z_v[ss+sp],tmp_p - tmp_m);
}
});
}
@ -208,10 +213,20 @@ void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
z.Checkerboard() = x.Checkerboard();
conformable(x, z);
Gamma G5(Gamma::Algebra::Gamma5);
z = G5 * x;
autoView( x_v, x, AcceleratorRead);
autoView( z_v, z, AcceleratorWrite);
uint64_t nloop = grid->oSites();
accelerator_for(ss,nloop,vobj::Nsimd(),{
auto tmp = x_v(ss);
decltype(tmp) tmp_p;
decltype(tmp) tmp_m;
spProj5p(tmp_p,tmp);
spProj5m(tmp_m,tmp);
coalescedWrite(z_v[ss],tmp_p - tmp_m);
});
}
/*
template<class CComplex, int nbasis>
void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
{
@ -234,6 +249,7 @@ void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex,
}
});
}
*/
NAMESPACE_END(Grid);

View File

@ -151,6 +151,9 @@ inline void *acceleratorAllocShared(size_t bytes)
ptr = (void *) NULL;
printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
}
// size_t free,total;
// cudaMemGetInfo(&free,&total);
// std::cout << "Malloc managed "<<bytes<<" "<<free<<"/"<<total<<std::endl;
return ptr;
};
inline void *acceleratorAllocDevice(size_t bytes)
@ -161,6 +164,9 @@ inline void *acceleratorAllocDevice(size_t bytes)
ptr = (void *) NULL;
printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
}
// size_t free,total;
// cudaMemGetInfo(&free,&total);
// std::cout << "Malloc device "<<bytes<<" "<<free<<"/"<<total<<std::endl;
return ptr;
};
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};

View File

@ -191,7 +191,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=LMIN;lat<=LMAX;lat+=LADD){
@ -216,15 +216,16 @@ int main (int argc, char ** argv)
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double ncbytes=5*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<ncbytes/time<<"\t\t" << flops/time<<std::endl;
}
}
#if 1
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x * Cshift(y)"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=LMIN;lat<=LMAX;lat+=LADD){
@ -258,10 +259,11 @@ int main (int argc, char ** argv)
tmult = tmult /Nloop;
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double ncbytes=5*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
time = time * 1000; // convert to NS for GB/s
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" <<ncbytes/time<<"\t\t" << flops/time<<std::endl;
}
}
#endif

View File

@ -29,7 +29,91 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/Grid.h>
using namespace Grid;
;
void MomentumSpacePropagatorTest(RealD mass,RealD M5, LatticePropagator &prop)
{
// what type LatticeComplex
GridBase *_grid = prop.Grid();
typedef LatticeFermion FermionField;
typedef LatticePropagator PropagatorField;
typedef typename FermionField::vector_type vector_type;
typedef typename FermionField::scalar_type ScalComplex;
typedef iSinglet<ScalComplex> Tcomplex;
typedef Lattice<iSinglet<vector_type> > LatComplex;
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
Coordinate latt_size = _grid->_fdimensions;
PropagatorField num (_grid); num = Zero();
LatComplex sk(_grid); sk = Zero();
LatComplex sk2(_grid); sk2= Zero();
LatComplex W(_grid); W= Zero();
LatComplex a(_grid); a= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
LatComplex denom(_grid); denom= Zero();
LatComplex cosha(_grid);
LatComplex kmu(_grid);
LatComplex Wea(_grid);
LatComplex Wema(_grid);
ScalComplex ci(0.0,1.0);
SpinColourMatrixD identity = ComplexD(1.0);
for(int mu=0;mu<Nd;mu++) {
LatticeCoordinate(kmu,mu);
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
kmu = TwoPiL * kmu;
// kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
sk = sk + sin(kmu) *sin(kmu);
num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*identity);
}
W = one - M5 + sk2;
////////////////////////////////////////////
// Cosh alpha -> alpha
////////////////////////////////////////////
cosha = (one + W*W + sk) / (abs(W)*2.0);
// FIXME Need a Lattice acosh
{
autoView(cosha_v,cosha,CpuRead);
autoView(a_v,a,CpuWrite);
for(int idx=0;idx<_grid->lSites();idx++){
Coordinate lcoor(Nd);
Tcomplex cc;
// RealD sgn;
_grid->LocalIndexToLocalCoor(idx,lcoor);
peekLocalSite(cc,cosha_v,lcoor);
assert((double)real(cc)>=1.0);
assert(fabs((double)imag(cc))<=1.0e-15);
cc = ScalComplex(::acosh(real(cc)),0.0);
pokeLocalSite(cc,a_v,lcoor);
}}
Wea = ( exp( a) * abs(W) );
Wema= ( exp(-a) * abs(W) );
num = num + ( one - Wema ) * mass * identity;
denom= ( Wea - one ) + mass*mass * (one - Wema);
prop = num/denom;
}
int main (int argc, char ** argv)
{
@ -307,6 +391,17 @@ int main (int argc, char ** argv)
RealD M5 =0.8;
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5);
/////////////////// Test code for (1-m)^2 ///////////////
LatticePropagatorD prop1(&GRID);
LatticePropagatorD prop2(&GRID);
LatticeComplexD ratio(&GRID);
MomentumSpacePropagatorTest(0.0,M5,prop1);
MomentumSpacePropagatorTest(0.3,M5,prop2);
ratio=localNorm2(prop2);
ratio=ratio/localNorm2(prop1);
std::cout << ratio;
/////////////////// Test code for (1-m)^2 factor ///////////////
// Momentum space prop
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
bool fiveD = false; //calculate 4d free propagator

View File

@ -565,8 +565,8 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " Running Multigrid "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
BiCGSTAB<CoarseVector> CoarseMgridBiCGSTAB(0.01,1000);
BiCGSTAB<LatticeFermion> FineMgridBiCGSTAB(0.0,24);
BiCGSTAB<CoarseVector> CoarseMgridBiCGSTAB(0.01,1000,false);
BiCGSTAB<LatticeFermion> FineMgridBiCGSTAB(0.0,24,false);
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
ZeroGuesser<LatticeFermion> FineZeroGuesser;
@ -590,5 +590,5 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Done "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
Grid_finalize();
}

File diff suppressed because it is too large Load Diff