mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-14 05:07:05 +01:00
Compare commits
26 Commits
677b4cc5b0
...
feature/hw
Author | SHA1 | Date | |
---|---|---|---|
3064c9a6e2 | |||
729882827c | |||
baa668d3ac | |||
3c82d16ed8 | |||
5c8c0c2d7c | |||
e5a100846c | |||
a74e2dc12e | |||
595f512a6e | |||
a6499b22ff | |||
b4e42a59c6 | |||
8c913e0edd | |||
fd3f93d8d3 | |||
e9543cdacd | |||
98f7b3d298 | |||
b7b164ea24 | |||
77124d99d5 | |||
e1327e7ea0 | |||
569f78c2cf | |||
488c79d5a1 | |||
dc6b0f20b2 | |||
c0badc3e16 | |||
58f6529b55 | |||
e3f056dfbb | |||
da0ffa7a79 | |||
fcc7640b9c | |||
0cbe2859e0 |
@ -49,13 +49,11 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
|
|||||||
Lattice<dotp> fine_inner_msk(fine);
|
Lattice<dotp> fine_inner_msk(fine);
|
||||||
|
|
||||||
// Multiply could be fused with innerProduct
|
// Multiply could be fused with innerProduct
|
||||||
// Single block sum kernel could do both masks.
|
|
||||||
fine_inner = localInnerProduct(fineX,fineY);
|
fine_inner = localInnerProduct(fineX,fineY);
|
||||||
mult(fine_inner_msk, fine_inner,FineMask);
|
mult(fine_inner_msk, fine_inner,FineMask);
|
||||||
blockSum(CoarseInner,fine_inner_msk);
|
blockSum(CoarseInner,fine_inner_msk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class Geometry {
|
class Geometry {
|
||||||
public:
|
public:
|
||||||
int npoint;
|
int npoint;
|
||||||
@ -80,8 +78,12 @@ public:
|
|||||||
}
|
}
|
||||||
directions [2*_d]=0;
|
directions [2*_d]=0;
|
||||||
displacements[2*_d]=0;
|
displacements[2*_d]=0;
|
||||||
}
|
|
||||||
|
|
||||||
|
std::cout <<GridLogMessage << "Geometry "<<std::endl;
|
||||||
|
for(int p=0;p<npoint;p++){
|
||||||
|
std::cout <<GridLogMessage << "point " <<p<<" dir "<<directions[p]<<" delta " <<displacements[p]<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Fobj,class CComplex,int nbasis>
|
template<class Fobj,class CComplex,int nbasis>
|
||||||
@ -102,8 +104,8 @@ public:
|
|||||||
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
|
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
|
||||||
CoarseGrid(_CoarseGrid),
|
CoarseGrid(_CoarseGrid),
|
||||||
FineGrid(_FineGrid),
|
FineGrid(_FineGrid),
|
||||||
subspace(nbasis,_FineGrid),
|
checkerboard(_checkerboard),
|
||||||
checkerboard(_checkerboard)
|
subspace(nbasis,_FineGrid)
|
||||||
{
|
{
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -285,6 +287,8 @@ public:
|
|||||||
///////////////////////
|
///////////////////////
|
||||||
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
|
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
|
||||||
|
|
||||||
|
virtual std::vector<int> Directions(void) { return geom.directions; };
|
||||||
|
virtual std::vector<int> Displacements(void){ return geom.displacements; };
|
||||||
void M (const CoarseVector &in, CoarseVector &out)
|
void M (const CoarseVector &in, CoarseVector &out)
|
||||||
{
|
{
|
||||||
conformable(_grid,in.Grid());
|
conformable(_grid,in.Grid());
|
||||||
@ -308,6 +312,9 @@ public:
|
|||||||
|
|
||||||
int osites=Grid()->oSites();
|
int osites=Grid()->oSites();
|
||||||
|
|
||||||
|
autoView(st,Stencil,AcceleratorRead);
|
||||||
|
siteVector *CBp=Stencil.CommBuf();
|
||||||
|
|
||||||
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
||||||
int ss = sss/nbasis;
|
int ss = sss/nbasis;
|
||||||
int b = sss%nbasis;
|
int b = sss%nbasis;
|
||||||
@ -318,12 +325,12 @@ public:
|
|||||||
|
|
||||||
for(int point=0;point<geom.npoint;point++){
|
for(int point=0;point<geom.npoint;point++){
|
||||||
|
|
||||||
SE=Stencil.GetEntry(ptype,point,ss);
|
SE=st.GetEntry(ptype,point,ss);
|
||||||
|
|
||||||
if(SE->_is_local) {
|
if(SE->_is_local) {
|
||||||
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
|
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
|
||||||
} else {
|
} else {
|
||||||
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]);
|
nbr = coalescedRead(CBp[SE->_offset]);
|
||||||
}
|
}
|
||||||
acceleratorSynchronise();
|
acceleratorSynchronise();
|
||||||
|
|
||||||
@ -332,7 +339,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
coalescedWrite(out_v[ss](b),res);
|
coalescedWrite(out_v[ss](b),res);
|
||||||
});
|
});
|
||||||
|
|
||||||
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose();
|
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose();
|
||||||
};
|
};
|
||||||
@ -409,38 +416,23 @@ public:
|
|||||||
MdirCalc(in,out[p],p);
|
MdirCalc(in,out[p],p);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
|
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp)
|
||||||
|
{
|
||||||
this->MdirComms(in);
|
this->MdirComms(in);
|
||||||
|
|
||||||
int ndim = in.Grid()->Nd();
|
int ndim = in.Grid()->Nd();
|
||||||
|
|
||||||
//////////////
|
int point=-1;
|
||||||
// 4D action like wilson
|
for(int p=0;p<geom.npoint;p++){
|
||||||
// 0+ => 0
|
if( (dir==geom.directions[p])&&(disp==geom.displacements[p])) point=p;
|
||||||
// 0- => 1
|
}
|
||||||
// 1+ => 2
|
assert(point!=-1);// Must find
|
||||||
// 1- => 3
|
|
||||||
// etc..
|
|
||||||
//////////////
|
|
||||||
// 5D action like DWF
|
|
||||||
// 1+ => 0
|
|
||||||
// 1- => 1
|
|
||||||
// 2+ => 2
|
|
||||||
// 2- => 3
|
|
||||||
// etc..
|
|
||||||
auto point = [dir, disp, ndim](){
|
|
||||||
if(dir == 0 and disp == 0)
|
|
||||||
return 8;
|
|
||||||
else if ( ndim==4 ) {
|
|
||||||
return (4 * dir + 1 - disp) / 2;
|
|
||||||
} else {
|
|
||||||
return (4 * (dir-1) + 1 - disp) / 2;
|
|
||||||
}
|
|
||||||
}();
|
|
||||||
|
|
||||||
|
std::cout <<GridLogMessage << "Mdir point "<<point<<" dir "<<dir<<" disp "<<disp <<std::endl;
|
||||||
|
for(int p=0;p<geom.npoint;p++){
|
||||||
|
std::cout <<GridLogMessage << "point " <<p<<" dir "<<geom.directions[p]<<" delta " <<geom.displacements[p]<<std::endl;
|
||||||
|
}
|
||||||
MdirCalc(in,out,point);
|
MdirCalc(in,out,point);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void Mdiag(const CoarseVector &in, CoarseVector &out)
|
void Mdiag(const CoarseVector &in, CoarseVector &out)
|
||||||
@ -456,10 +448,58 @@ public:
|
|||||||
geom(CoarseGrid._ndimension),
|
geom(CoarseGrid._ndimension),
|
||||||
hermitian(hermitian_),
|
hermitian(hermitian_),
|
||||||
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
|
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
|
||||||
A(geom.npoint,&CoarseGrid)
|
A(geom.npoint,&CoarseGrid)
|
||||||
{
|
{
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void Test(Aggregation<Fobj,CComplex,nbasis> &_Aggregates,GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop)
|
||||||
|
{
|
||||||
|
typedef Lattice<Fobj> FineField;
|
||||||
|
CoarseVector Cin(_grid);
|
||||||
|
CoarseVector Cout(_grid);
|
||||||
|
CoarseVector CFout(_grid);
|
||||||
|
|
||||||
|
FineField Fin(FineGrid);
|
||||||
|
FineField Fout(FineGrid);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> seeds({1,2,3,4,5});
|
||||||
|
GridParallelRNG RNG(_grid); RNG.SeedFixedIntegers(seeds);
|
||||||
|
gaussian(RNG,Cin);
|
||||||
|
|
||||||
|
_Aggregates.PromoteFromSubspace(Cin,Fin);
|
||||||
|
_Aggregates.ProjectToSubspace(Cin,Fin);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "************ "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< " Testing M "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "************ "<<std::endl;
|
||||||
|
// Coarse operator
|
||||||
|
this->M(Cin,Cout);
|
||||||
|
// Fine projected operator
|
||||||
|
_Aggregates.PromoteFromSubspace(Cin,Fin);
|
||||||
|
linop.Op(Fin,Fout);
|
||||||
|
_Aggregates.ProjectToSubspace(CFout,Fout);
|
||||||
|
|
||||||
|
CFout = CFout-Cout;
|
||||||
|
RealD diff = norm2(CFout);
|
||||||
|
std::cout << GridLogMessage<< " diff "<<diff<<std::endl;
|
||||||
|
assert(diff<1.0e-5);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "************ "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< " Testing Mdag "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "************ "<<std::endl;
|
||||||
|
// Coarse operator
|
||||||
|
Mdag(Cin,Cout);
|
||||||
|
// Fine operator
|
||||||
|
linop.AdjOp(Fin,Fout);
|
||||||
|
_Aggregates.ProjectToSubspace(CFout,Fout);
|
||||||
|
|
||||||
|
CFout = CFout-Cout;
|
||||||
|
diff = norm2(CFout);
|
||||||
|
std::cout << GridLogMessage<< " diff "<<diff<<std::endl;
|
||||||
|
assert(diff<1.0e-5);
|
||||||
|
}
|
||||||
|
|
||||||
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||||
Aggregation<Fobj,CComplex,nbasis> & Subspace)
|
Aggregation<Fobj,CComplex,nbasis> & Subspace)
|
||||||
{
|
{
|
||||||
@ -496,8 +536,19 @@ public:
|
|||||||
|
|
||||||
CoarseScalar InnerProd(Grid());
|
CoarseScalar InnerProd(Grid());
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "CoarsenMatrix Orthog " << std::endl;
|
||||||
// Orthogonalise the subblocks over the basis
|
// Orthogonalise the subblocks over the basis
|
||||||
blockOrthogonalise(InnerProd,Subspace.subspace);
|
blockOrthogonalise(InnerProd,Subspace.subspace);
|
||||||
|
std::cout << GridLogMessage<< "CoarsenMatrix Orthog done " << std::endl;
|
||||||
|
|
||||||
|
auto OpDirections = linop.Directions();
|
||||||
|
auto OpDisplacements = linop.Displacements();
|
||||||
|
|
||||||
|
std::cout<<" Coarsening an operator with "<< OpDirections.size()<<" terms "<<std::endl;
|
||||||
|
for(int p=0;p<OpDirections.size();p++) {
|
||||||
|
assert(OpDirections[p]==geom.directions[p]);
|
||||||
|
assert(OpDisplacements[p]==geom.displacements[p]);
|
||||||
|
}
|
||||||
|
|
||||||
// Compute the matrix elements of linop between this orthonormal
|
// Compute the matrix elements of linop between this orthonormal
|
||||||
// set of vectors.
|
// set of vectors.
|
||||||
@ -533,13 +584,27 @@ public:
|
|||||||
evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
|
evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
|
||||||
oddmask = one-evenmask;
|
oddmask = one-evenmask;
|
||||||
|
|
||||||
|
/*
|
||||||
|
{
|
||||||
|
phi=Subspace.subspace[0];
|
||||||
|
linop.OpDirAll(phi,Mphi_p);
|
||||||
|
for(int p=0;p<geom.npoint-1;p++){
|
||||||
|
int dir=geom.directions[p];
|
||||||
|
int disp=geom.displacements[p];
|
||||||
|
linop.OpDir(phi,Mphi,dir,disp);
|
||||||
|
Mphi=Mphi-Mphi_p[p];
|
||||||
|
std::cout << GridLogMessage <<" Direction mapping check " <<norm2(Mphi)<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
assert(self_stencil!=-1);
|
assert(self_stencil!=-1);
|
||||||
|
int lhermitian=hermitian;
|
||||||
|
|
||||||
for(int i=0;i<nbasis;i++){
|
for(int i=0;i<nbasis;i++){
|
||||||
|
|
||||||
phi=Subspace.subspace[i];
|
phi=Subspace.subspace[i];
|
||||||
|
|
||||||
// std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
|
std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
|
||||||
linop.OpDirAll(phi,Mphi_p);
|
linop.OpDirAll(phi,Mphi_p);
|
||||||
linop.OpDiag (phi,Mphi_p[geom.npoint-1]);
|
linop.OpDiag (phi,Mphi_p[geom.npoint-1]);
|
||||||
|
|
||||||
@ -550,7 +615,7 @@ public:
|
|||||||
int dir = geom.directions[p];
|
int dir = geom.directions[p];
|
||||||
int disp = geom.displacements[p];
|
int disp = geom.displacements[p];
|
||||||
|
|
||||||
if ( (disp==-1) || (!hermitian ) ) {
|
if ( (disp==-1) || (!lhermitian ) ) {
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Pick out contributions coming from this cell and neighbour cell
|
// Pick out contributions coming from this cell and neighbour cell
|
||||||
@ -568,11 +633,23 @@ public:
|
|||||||
autoView( A_self , A[self_stencil], AcceleratorWrite);
|
autoView( A_self , A[self_stencil], AcceleratorWrite);
|
||||||
|
|
||||||
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
||||||
|
if ( lhermitian && (disp==-1) ) {
|
||||||
|
for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>*
|
||||||
|
int dirp = geom.directions[pp];
|
||||||
|
int dispp = geom.displacements[pp];
|
||||||
|
if ( (dirp==dir) && (dispp==1) ){
|
||||||
|
auto sft = conjugate(Cshift(oZProj,dir,1));
|
||||||
|
autoView( sft_v , sft , AcceleratorWrite);
|
||||||
|
autoView( A_pp , A[pp], AcceleratorWrite);
|
||||||
|
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "CoarsenMatrix Diag "<<std::endl;
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
// Faster alternate self coupling.. use hermiticity to save 2x
|
// Faster alternate self coupling.. use hermiticity to save 2x
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
@ -604,31 +681,35 @@ public:
|
|||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(hermitian) {
|
|
||||||
std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
|
MemoryManager::PrintBytes();
|
||||||
ForceHermitian();
|
|
||||||
|
// Auto self test
|
||||||
|
Test( Subspace,FineGrid,linop);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
///////////////////////////
|
||||||
|
// test code worth preserving in if block
|
||||||
|
///////////////////////////
|
||||||
|
std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl;
|
||||||
|
for(int p=0;p<geom.npoint;p++){
|
||||||
|
std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl;
|
||||||
|
std::cout<<GridLogMessage<< "\n"<<A[p] << std::endl;
|
||||||
}
|
}
|
||||||
|
std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl;
|
||||||
|
|
||||||
|
phi=Subspace.subspace[0];
|
||||||
|
std::vector<int> bc(FineGrid->_ndimension,0);
|
||||||
|
blockPick(Grid(),phi,tmp,bc); // Pick out a block
|
||||||
|
linop.Op(tmp,Mphi); // Apply big dop
|
||||||
|
blockProject(iProj,Mphi,Subspace.subspace); // project it and print it
|
||||||
|
std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<< iProj <<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ForceHermitian(void) {
|
|
||||||
CoarseMatrix Diff (Grid());
|
|
||||||
for(int p=0;p<geom.npoint;p++){
|
|
||||||
int dir = geom.directions[p];
|
|
||||||
int disp = geom.displacements[p];
|
|
||||||
if(disp==-1) {
|
|
||||||
// Find the opposite link
|
|
||||||
for(int pp=0;pp<geom.npoint;pp++){
|
|
||||||
int dirp = geom.directions[pp];
|
|
||||||
int dispp = geom.displacements[pp];
|
|
||||||
if ( (dirp==dir) && (dispp==1) ){
|
|
||||||
// Diff = adj(Cshift(A[p],dir,1)) - A[pp];
|
|
||||||
// std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
|
|
||||||
A[pp] = adj(Cshift(A[p],dir,1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -52,6 +52,9 @@ public:
|
|||||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
|
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
|
||||||
virtual void HermOp(const Field &in, Field &out)=0;
|
virtual void HermOp(const Field &in, Field &out)=0;
|
||||||
|
|
||||||
|
virtual std::vector<int> Directions(void) =0;
|
||||||
|
virtual std::vector<int> Displacements(void)=0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -76,6 +79,9 @@ class MdagMLinearOperator : public LinearOperatorBase<Field> {
|
|||||||
public:
|
public:
|
||||||
MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
|
MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||||
|
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
|
|
||||||
// Support for coarsening to a multigrid
|
// Support for coarsening to a multigrid
|
||||||
void OpDiag (const Field &in, Field &out) {
|
void OpDiag (const Field &in, Field &out) {
|
||||||
_Mat.Mdiag(in,out);
|
_Mat.Mdiag(in,out);
|
||||||
@ -111,6 +117,8 @@ class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
|
|||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
RealD _shift;
|
RealD _shift;
|
||||||
public:
|
public:
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
|
ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
|
||||||
// Support for coarsening to a multigrid
|
// Support for coarsening to a multigrid
|
||||||
void OpDiag (const Field &in, Field &out) {
|
void OpDiag (const Field &in, Field &out) {
|
||||||
@ -151,6 +159,8 @@ template<class Matrix,class Field>
|
|||||||
class HermitianLinearOperator : public LinearOperatorBase<Field> {
|
class HermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
public:
|
public:
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||||
// Support for coarsening to a multigrid
|
// Support for coarsening to a multigrid
|
||||||
void OpDiag (const Field &in, Field &out) {
|
void OpDiag (const Field &in, Field &out) {
|
||||||
@ -182,6 +192,8 @@ template<class Matrix,class Field>
|
|||||||
class NonHermitianLinearOperator : public LinearOperatorBase<Field> {
|
class NonHermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
public:
|
public:
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||||
// Support for coarsening to a multigrid
|
// Support for coarsening to a multigrid
|
||||||
void OpDiag (const Field &in, Field &out) {
|
void OpDiag (const Field &in, Field &out) {
|
||||||
@ -255,6 +267,8 @@ template<class Matrix,class Field>
|
|||||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||||
public:
|
public:
|
||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
virtual void Mpc (const Field &in, Field &out) {
|
virtual void Mpc (const Field &in, Field &out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
@ -281,6 +295,8 @@ template<class Matrix,class Field>
|
|||||||
protected:
|
protected:
|
||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
public:
|
public:
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
|
|
||||||
virtual void Mpc (const Field &in, Field &out) {
|
virtual void Mpc (const Field &in, Field &out) {
|
||||||
@ -307,6 +323,8 @@ template<class Matrix,class Field>
|
|||||||
protected:
|
protected:
|
||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
public:
|
public:
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
|
|
||||||
virtual void Mpc (const Field &in, Field &out) {
|
virtual void Mpc (const Field &in, Field &out) {
|
||||||
@ -372,6 +390,8 @@ class NonHermitianSchurDiagMooeeOperator : public NonHermitianSchurOperatorBase
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Matrix& _Mat;
|
Matrix& _Mat;
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
|
NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
|
||||||
virtual void Mpc(const Field& in, Field& out) {
|
virtual void Mpc(const Field& in, Field& out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
@ -405,6 +425,8 @@ class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase<Fi
|
|||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
|
NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
|
||||||
virtual void Mpc(const Field& in, Field& out) {
|
virtual void Mpc(const Field& in, Field& out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
@ -435,6 +457,8 @@ class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Fi
|
|||||||
Matrix& _Mat;
|
Matrix& _Mat;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
|
NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
|
||||||
|
|
||||||
virtual void Mpc(const Field& in, Field& out) {
|
virtual void Mpc(const Field& in, Field& out) {
|
||||||
@ -475,6 +499,8 @@ class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
|||||||
Field tmp;
|
Field tmp;
|
||||||
RealD mass;
|
RealD mass;
|
||||||
public:
|
public:
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||||
{
|
{
|
||||||
assert( _Mat.isTrivialEE() );
|
assert( _Mat.isTrivialEE() );
|
||||||
|
@ -48,6 +48,8 @@ public:
|
|||||||
virtual void Mdiag (const Field &in, Field &out)=0;
|
virtual void Mdiag (const Field &in, Field &out)=0;
|
||||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
||||||
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
|
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
|
||||||
|
virtual std::vector<int> Directions(void) =0;
|
||||||
|
virtual std::vector<int> Displacements(void)=0;
|
||||||
};
|
};
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -73,6 +75,8 @@ public:
|
|||||||
virtual void MooeeDag (const Field &in, Field &out)=0;
|
virtual void MooeeDag (const Field &in, Field &out)=0;
|
||||||
virtual void MooeeInvDag (const Field &in, Field &out)=0;
|
virtual void MooeeInvDag (const Field &in, Field &out)=0;
|
||||||
|
|
||||||
|
virtual std::vector<int> Directions(void) =0;
|
||||||
|
virtual std::vector<int> Displacements(void)=0;
|
||||||
};
|
};
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -28,6 +28,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||||
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
/*
|
/*
|
||||||
* Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv.
|
* Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv.
|
||||||
* Script A = SolverMatrix
|
* Script A = SolverMatrix
|
||||||
@ -50,53 +51,54 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
* Vout = x
|
* Vout = x
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// abstract base
|
|
||||||
template<class Field, class CoarseField>
|
template<class Field, class CoarseField, class Aggregates>
|
||||||
class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
int verbose;
|
int verbose;
|
||||||
|
|
||||||
RealD Tolerance;
|
RealD Tolerance;
|
||||||
Integer MaxIterations;
|
Integer MaxIterations;
|
||||||
const int mmax = 5;
|
const int mmax = 4;
|
||||||
GridBase *grid;
|
GridBase *FineGrid;
|
||||||
GridBase *coarsegrid;
|
GridBase *CoarseGrid;
|
||||||
|
|
||||||
LinearOperatorBase<Field> *_Linop
|
LinearOperatorBase<Field> &_Linop;
|
||||||
OperatorFunction<Field> *_Smoother,
|
LinearFunction<Field> &_Smoother;
|
||||||
LinearFunction<CoarseField> *_CoarseSolver;
|
LinearFunction<CoarseField> &_CoarseSolver;
|
||||||
|
Aggregates &_Aggregates;
|
||||||
// Need somthing that knows how to get from Coarse to fine and back again
|
|
||||||
|
|
||||||
// more most opertor functions
|
// more most opertor functions
|
||||||
TwoLevelFlexiblePcg(RealD tol,
|
TwoLevelFlexiblePcg(RealD tol,
|
||||||
Integer maxit,
|
Integer maxit,
|
||||||
LinearOperatorBase<Field> *Linop,
|
LinearOperatorBase<Field> *Linop,
|
||||||
LinearOperatorBase<Field> *SmootherLinop,
|
LinearFunction<Field> *Smoother,
|
||||||
OperatorFunction<Field> *Smoother,
|
LinearFunction<CoarseField> *CoarseSolver,
|
||||||
OperatorFunction<CoarseField> CoarseLinop
|
Aggregates *AggP
|
||||||
) :
|
) :
|
||||||
Tolerance(tol),
|
Tolerance(tol),
|
||||||
MaxIterations(maxit),
|
MaxIterations(maxit),
|
||||||
_Linop(Linop),
|
_Linop(*Linop),
|
||||||
_PreconditionerLinop(PrecLinop),
|
_Smoother(*Smoother),
|
||||||
_Preconditioner(Preconditioner)
|
_CoarseSolver(*CoarseSolver),
|
||||||
|
_Aggregates(*AggP)
|
||||||
{
|
{
|
||||||
|
CoarseGrid=_Aggregates.CoarseGrid;
|
||||||
|
FineGrid=_Aggregates.FineGrid;
|
||||||
verbose=0;
|
verbose=0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// The Pcg routine is common to all, but the various matrices differ from derived
|
// The Pcg routine is common to all, but the various matrices differ from derived
|
||||||
// implementation to derived implmentation
|
// implementation to derived implmentation
|
||||||
void operator() (const Field &src, Field &psi){
|
|
||||||
void operator() (const Field &src, Field &psi){
|
void operator() (const Field &src, Field &psi){
|
||||||
|
|
||||||
psi.Checkerboard() = src.Checkerboard();
|
psi.Checkerboard() = src.Checkerboard();
|
||||||
grid = src.Grid();
|
|
||||||
|
|
||||||
RealD f;
|
|
||||||
RealD rtzp,rtz,a,d,b;
|
RealD rtzp,rtz,a,d,b;
|
||||||
RealD rptzp;
|
// RealD rptzp;
|
||||||
RealD tn;
|
// RealD tn;
|
||||||
RealD guess = norm2(psi);
|
RealD guess = norm2(psi);
|
||||||
RealD ssq = norm2(src);
|
RealD ssq = norm2(src);
|
||||||
RealD rsq = ssq*Tolerance*Tolerance;
|
RealD rsq = ssq*Tolerance*Tolerance;
|
||||||
@ -104,15 +106,15 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
|||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Set up history vectors
|
// Set up history vectors
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
std::vector<Field> p (mmax,grid);
|
std::vector<Field> p (mmax,FineGrid);
|
||||||
std::vector<Field> mmp(mmax,grid);
|
std::vector<Field> mmp(mmax,FineGrid);
|
||||||
std::vector<RealD> pAp(mmax);
|
std::vector<RealD> pAp(mmax);
|
||||||
|
|
||||||
Field x (grid); x = psi;
|
Field x (FineGrid); x = psi;
|
||||||
Field z (grid);
|
Field z (FineGrid);
|
||||||
Field tmp(grid);
|
Field tmp(FineGrid);
|
||||||
Field r (grid);
|
Field r (FineGrid);
|
||||||
Field mu (grid);
|
Field mu (FineGrid);
|
||||||
|
|
||||||
//////////////////////////
|
//////////////////////////
|
||||||
// x0 = Vstart -- possibly modify guess
|
// x0 = Vstart -- possibly modify guess
|
||||||
@ -121,13 +123,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
|||||||
Vstart(x,src);
|
Vstart(x,src);
|
||||||
|
|
||||||
// r0 = b -A x0
|
// r0 = b -A x0
|
||||||
HermOp(x,mmp); // Shouldn't this be something else?
|
_Linop.HermOp(x,mmp[0]); // Shouldn't this be something else?
|
||||||
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
|
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
|
||||||
|
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
// Compute z = M1 x
|
// Compute z = M1 x
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
M1(r,z,tmp,mp,SmootherMirs);
|
M1(r,z);
|
||||||
rtzp =real(innerProduct(r,z));
|
rtzp =real(innerProduct(r,z));
|
||||||
|
|
||||||
///////////////////////////////////////
|
///////////////////////////////////////
|
||||||
@ -143,7 +145,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
|||||||
int peri_kp = (k+1) % mmax;
|
int peri_kp = (k+1) % mmax;
|
||||||
|
|
||||||
rtz=rtzp;
|
rtz=rtzp;
|
||||||
d= M3(p[peri_k],mp,mmp[peri_k],tmp);
|
d= M3(p[peri_k],mmp[peri_k]);
|
||||||
a = rtz/d;
|
a = rtz/d;
|
||||||
|
|
||||||
// Memorise this
|
// Memorise this
|
||||||
@ -153,13 +155,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
|||||||
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
|
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
|
||||||
|
|
||||||
// Compute z = M x
|
// Compute z = M x
|
||||||
M1(r,z,tmp,mp);
|
M1(r,z);
|
||||||
|
|
||||||
rtzp =real(innerProduct(r,z));
|
rtzp =real(innerProduct(r,z));
|
||||||
|
|
||||||
M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
|
M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
|
||||||
|
|
||||||
p[peri_kp]=p[peri_k];
|
p[peri_kp]=mu;
|
||||||
|
|
||||||
// Standard search direction p -> z + b p ; b =
|
// Standard search direction p -> z + b p ; b =
|
||||||
b = (rtzp)/rtz;
|
b = (rtzp)/rtz;
|
||||||
@ -181,7 +183,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
|||||||
// Stopping condition
|
// Stopping condition
|
||||||
if ( rn <= rsq ) {
|
if ( rn <= rsq ) {
|
||||||
|
|
||||||
HermOp(x,mmp); // Shouldn't this be something else?
|
_Linop.HermOp(x,mmp[0]); // Shouldn't this be something else?
|
||||||
axpy(tmp,-1.0,src,mmp[0]);
|
axpy(tmp,-1.0,src,mmp[0]);
|
||||||
|
|
||||||
RealD psinorm = sqrt(norm2(x));
|
RealD psinorm = sqrt(norm2(x));
|
||||||
@ -190,7 +192,8 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
|||||||
RealD true_residual = tmpnorm/srcnorm;
|
RealD true_residual = tmpnorm/srcnorm;
|
||||||
std::cout<<GridLogMessage<<"TwoLevelfPcg: true residual is "<<true_residual<<std::endl;
|
std::cout<<GridLogMessage<<"TwoLevelfPcg: true residual is "<<true_residual<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
|
std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
|
||||||
return k;
|
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Non-convergence
|
// Non-convergence
|
||||||
@ -199,48 +202,40 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
virtual void M(Field & in,Field & out,Field & tmp) {
|
virtual void M1(Field & in, Field & out)
|
||||||
|
{// the smoother
|
||||||
}
|
|
||||||
|
|
||||||
virtual void M1(Field & in, Field & out) {// the smoother
|
|
||||||
|
|
||||||
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
||||||
Field tmp(grid);
|
Field tmp(FineGrid);
|
||||||
Field Min(grid);
|
Field Min(FineGrid);
|
||||||
|
|
||||||
PcgM(in,Min); // Smoother call
|
CoarseField PleftProj(CoarseGrid);
|
||||||
|
CoarseField PleftMss_proj(CoarseGrid);
|
||||||
|
|
||||||
HermOp(Min,out);
|
_Smoother(in,Min); // Smoother call
|
||||||
|
|
||||||
|
_Linop.HermOp(Min,out);
|
||||||
axpy(tmp,-1.0,out,in); // tmp = in - A Min
|
axpy(tmp,-1.0,out,in); // tmp = in - A Min
|
||||||
|
|
||||||
ProjectToSubspace(tmp,PleftProj);
|
_Aggregates.ProjectToSubspace(PleftProj,tmp);
|
||||||
ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
||||||
PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
||||||
axpy(out,1.0,Min,tmp); // Min+tmp
|
axpy(out,1.0,Min,tmp); // Min+tmp
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void M2(const Field & in, Field & out) {
|
virtual void M2(const Field & in, Field & out)
|
||||||
|
{
|
||||||
out=in;
|
out=in;
|
||||||
// Must override for Def2 only
|
|
||||||
// case PcgDef2:
|
|
||||||
// Pright(in,out);
|
|
||||||
// break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual RealD M3(const Field & p, Field & mmp){
|
virtual RealD M3(const Field & p, Field & mmp)
|
||||||
|
{
|
||||||
double d,dd;
|
double d,dd;
|
||||||
HermOpAndNorm(p,mmp,d,dd);
|
_Linop.HermOpAndNorm(p,mmp,d,dd);
|
||||||
return dd;
|
return dd;
|
||||||
// Must override for Def1 only
|
|
||||||
// case PcgDef1:
|
|
||||||
// d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
|
|
||||||
// linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
|
|
||||||
// Pleft(mp,mmp);
|
|
||||||
// d=real(linop_d->inner(p,mmp));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void VstartDef2(Field & xconst Field & src){
|
virtual void Vstart(Field & x,const Field & src)
|
||||||
|
{
|
||||||
//case PcgDef2:
|
//case PcgDef2:
|
||||||
//case PcgAdef2:
|
//case PcgAdef2:
|
||||||
//case PcgAdef2f:
|
//case PcgAdef2f:
|
||||||
@ -256,142 +251,79 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
|||||||
// = src_s - (A guess)_s - src_s + (A guess)_s
|
// = src_s - (A guess)_s - src_s + (A guess)_s
|
||||||
// = 0
|
// = 0
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
Field r(grid);
|
Field r(FineGrid);
|
||||||
Field mmp(grid);
|
Field mmp(FineGrid);
|
||||||
|
|
||||||
|
CoarseField PleftProj(CoarseGrid);
|
||||||
|
CoarseField PleftMss_proj(CoarseGrid);
|
||||||
|
|
||||||
HermOp(x,mmp);
|
_Linop.HermOp(x,mmp);
|
||||||
axpy (r, -1.0, mmp, src); // r_{-1} = src - A x
|
axpy (r, -1.0, mmp, src); // r_{-1} = src - A x
|
||||||
ProjectToSubspace(r,PleftProj);
|
_Aggregates.ProjectToSubspace(PleftProj,r);
|
||||||
ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||||
PromoteFromSubspace(PleftMss_proj,mmp);
|
_Aggregates.PromoteFromSubspace(PleftMss_proj,mmp);
|
||||||
x=x+mmp;
|
x=x+mmp;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void Vstart(Field & x,const Field & src){
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
// Only Def1 has non-trivial Vout. Override in Def1
|
// Only Def1 has non-trivial Vout. Override in Def1
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
virtual void Vout (Field & in, Field & out,Field & src){
|
virtual void Vout (Field & in, Field & out,Field & src){
|
||||||
out = in;
|
out = in;
|
||||||
//case PcgDef1:
|
|
||||||
// //Qb + PT x
|
|
||||||
// ProjectToSubspace(src,PleftProj);
|
|
||||||
// ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
|
||||||
// PromoteFromSubspace(PleftMss_proj,tmp);
|
|
||||||
//
|
|
||||||
// Pright(in,out);
|
|
||||||
//
|
|
||||||
// linop_d->axpy(out,tmp,out,1.0);
|
|
||||||
// break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Pright and Pleft are common to all implementations
|
// Pright and Pleft are common to all implementations
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
virtual void Pright(Field & in,Field & out){
|
virtual void Pright(Field & in,Field & out)
|
||||||
|
{
|
||||||
// P_R = [ 1 0 ]
|
// P_R = [ 1 0 ]
|
||||||
// [ -Mss^-1 Msb 0 ]
|
// [ -Mss^-1 Msb 0 ]
|
||||||
Field in_sbar(grid);
|
Field in_sbar(FineGrid);
|
||||||
|
|
||||||
ProjectToSubspace(in,PleftProj);
|
CoarseField PleftProj(CoarseGrid);
|
||||||
PromoteFromSubspace(PleftProj,out);
|
CoarseField PleftMss_proj(CoarseGrid);
|
||||||
|
|
||||||
|
_Aggregates.ProjectToSubspace(PleftProj,in);
|
||||||
|
_Aggregates.PromoteFromSubspace(PleftProj,out);
|
||||||
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
||||||
|
|
||||||
HermOp(in_sbar,out);
|
_Linop.HermOp(in_sbar,out);
|
||||||
ProjectToSubspace(out,PleftProj); // Mssbar in_sbar (project)
|
_Aggregates.ProjectToSubspace(PleftProj,out); // Mssbar in_sbar (project)
|
||||||
|
|
||||||
ApplyInverse (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
|
_CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
|
||||||
PromoteFromSubspace(PleftMss_proj,out); //
|
_Aggregates.PromoteFromSubspace(PleftMss_proj,out); //
|
||||||
|
|
||||||
axpy(out,-1.0,out,in_sbar); // in_sbar - Mss^{-1} Mssbar in_sbar
|
axpy(out,-1.0,out,in_sbar); // in_sbar - Mss^{-1} Mssbar in_sbar
|
||||||
}
|
}
|
||||||
virtual void Pleft (Field & in,Field & out){
|
virtual void Pleft (Field & in,Field & out)
|
||||||
|
{
|
||||||
// P_L = [ 1 -Mbs Mss^-1]
|
// P_L = [ 1 -Mbs Mss^-1]
|
||||||
// [ 0 0 ]
|
// [ 0 0 ]
|
||||||
Field in_sbar(grid);
|
Field in_sbar(FineGrid);
|
||||||
Field tmp2(grid);
|
Field tmp2(FineGrid);
|
||||||
Field Mtmp(grid);
|
Field Mtmp(FineGrid);
|
||||||
|
|
||||||
ProjectToSubspace(in,PleftProj);
|
CoarseField PleftProj(CoarseGrid);
|
||||||
PromoteFromSubspace(PleftProj,out);
|
CoarseField PleftMss_proj(CoarseGrid);
|
||||||
|
|
||||||
|
_Aggregates.ProjectToSubspace(PleftProj,in);
|
||||||
|
_Aggregates.PromoteFromSubspace(PleftProj,out);
|
||||||
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
||||||
|
|
||||||
ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
|
_CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} in_s
|
||||||
PromoteFromSubspace(PleftMss_proj,out);
|
_Aggregates.PromoteFromSubspace(PleftMss_proj,out);
|
||||||
|
|
||||||
HermOp(out,Mtmp);
|
_Linop.HermOp(out,Mtmp);
|
||||||
|
|
||||||
ProjectToSubspace(Mtmp,PleftProj); // Msbar s Mss^{-1}
|
_Aggregates.ProjectToSubspace(PleftProj,Mtmp); // Msbar s Mss^{-1}
|
||||||
PromoteFromSubspace(PleftProj,tmp2);
|
_Aggregates.PromoteFromSubspace(PleftProj,tmp2);
|
||||||
|
|
||||||
axpy(out,-1.0,tmp2,Mtmp);
|
axpy(out,-1.0,tmp2,Mtmp);
|
||||||
axpy(out,-1.0,out,in_sbar); // in_sbar - Msbars Mss^{-1} in_s
|
axpy(out,-1.0,out,in_sbar); // in_sbar - Msbars Mss^{-1} in_s
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
template<class Field>
|
|
||||||
class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
|
|
||||||
public:
|
|
||||||
virtual void M(Field & in,Field & out,Field & tmp){
|
|
||||||
|
|
||||||
}
|
|
||||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
|
|
||||||
|
|
||||||
}
|
|
||||||
virtual void M2(Field & in, Field & out){
|
|
||||||
|
|
||||||
}
|
|
||||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
|
|
||||||
|
|
||||||
}
|
|
||||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
template<class Field>
|
|
||||||
class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
|
|
||||||
public:
|
|
||||||
virtual void M(Field & in,Field & out,Field & tmp);
|
|
||||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
|
||||||
virtual void M2(Field & in, Field & out);
|
|
||||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
|
||||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Field>
|
|
||||||
class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
|
|
||||||
public:
|
|
||||||
virtual void M(Field & in,Field & out,Field & tmp);
|
|
||||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
|
||||||
virtual void M2(Field & in, Field & out);
|
|
||||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
|
||||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
|
||||||
virtual void Vout (Field & in, Field & out,Field & src,Field & tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Field>
|
|
||||||
class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
|
|
||||||
public:
|
|
||||||
virtual void M(Field & in,Field & out,Field & tmp);
|
|
||||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
|
||||||
virtual void M2(Field & in, Field & out);
|
|
||||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
|
||||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Field>
|
|
||||||
class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
|
|
||||||
public:
|
|
||||||
virtual void M(Field & in,Field & out,Field & tmp);
|
|
||||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
|
||||||
virtual void M2(Field & in, Field & out);
|
|
||||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
|
||||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -60,6 +60,8 @@ public:
|
|||||||
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
|
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
|
||||||
|
|
||||||
virtual void operator()(const Field &src,Field &guess) {
|
virtual void operator()(const Field &src,Field &guess) {
|
||||||
|
RealD t=-usecond();
|
||||||
|
|
||||||
guess = Zero();
|
guess = Zero();
|
||||||
assert(evec.size()==eval.size());
|
assert(evec.size()==eval.size());
|
||||||
auto N = evec.size();
|
auto N = evec.size();
|
||||||
@ -68,6 +70,8 @@ public:
|
|||||||
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
||||||
}
|
}
|
||||||
guess.Checkerboard() = src.Checkerboard();
|
guess.Checkerboard() = src.Checkerboard();
|
||||||
|
t+=usecond();
|
||||||
|
std::cout<<GridLogMessage<<"\t\t\t" << "Deflated guess took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ public:
|
|||||||
GridBase *grid = src.Grid();
|
GridBase *grid = src.Grid();
|
||||||
Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid);
|
Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid);
|
||||||
|
|
||||||
psi=zero;
|
psi=Zero();
|
||||||
r = src;
|
r = src;
|
||||||
Preconditioner(r,p);
|
Preconditioner(r,p);
|
||||||
|
|
||||||
|
@ -53,7 +53,11 @@ public:
|
|||||||
{
|
{
|
||||||
size_type bytes = __n*sizeof(_Tp);
|
size_type bytes = __n*sizeof(_Tp);
|
||||||
profilerAllocate(bytes);
|
profilerAllocate(bytes);
|
||||||
|
#ifdef GRID_UVM
|
||||||
|
_Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes);
|
||||||
|
#else
|
||||||
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
|
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
|
||||||
|
#endif
|
||||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
@ -62,7 +66,11 @@ public:
|
|||||||
{
|
{
|
||||||
size_type bytes = __n * sizeof(_Tp);
|
size_type bytes = __n * sizeof(_Tp);
|
||||||
profilerFree(bytes);
|
profilerFree(bytes);
|
||||||
|
#ifdef GRID_UVM
|
||||||
|
MemoryManager::SharedFree((void *)__p,bytes);
|
||||||
|
#else
|
||||||
MemoryManager::CpuFree((void *)__p,bytes);
|
MemoryManager::CpuFree((void *)__p,bytes);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
|
// FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
|
||||||
|
@ -9,11 +9,13 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
#define AccSmall (3)
|
#define AccSmall (3)
|
||||||
#define Shared (4)
|
#define Shared (4)
|
||||||
#define SharedSmall (5)
|
#define SharedSmall (5)
|
||||||
|
uint64_t total_cache;
|
||||||
uint64_t total_shared;
|
uint64_t total_shared;
|
||||||
uint64_t total_device;
|
uint64_t total_device;
|
||||||
uint64_t total_host;;
|
uint64_t total_host;;
|
||||||
void MemoryManager::PrintBytes(void)
|
void MemoryManager::PrintBytes(void)
|
||||||
{
|
{
|
||||||
|
std::cout << " MemoryManager : "<<total_cache <<" cache bytes "<<std::endl;
|
||||||
std::cout << " MemoryManager : "<<total_shared<<" shared bytes "<<std::endl;
|
std::cout << " MemoryManager : "<<total_shared<<" shared bytes "<<std::endl;
|
||||||
std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl;
|
std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl;
|
||||||
std::cout << " MemoryManager : "<<total_host <<" cpu bytes "<<std::endl;
|
std::cout << " MemoryManager : "<<total_host <<" cpu bytes "<<std::endl;
|
||||||
@ -35,6 +37,8 @@ void *MemoryManager::AcceleratorAllocate(size_t bytes)
|
|||||||
if ( ptr == (void *) NULL ) {
|
if ( ptr == (void *) NULL ) {
|
||||||
ptr = (void *) acceleratorAllocDevice(bytes);
|
ptr = (void *) acceleratorAllocDevice(bytes);
|
||||||
total_device+=bytes;
|
total_device+=bytes;
|
||||||
|
} else {
|
||||||
|
// std::cout <<"AcceleratorAllocate: cache hit Device pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
|
||||||
}
|
}
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
@ -53,8 +57,10 @@ void *MemoryManager::SharedAllocate(size_t bytes)
|
|||||||
if ( ptr == (void *) NULL ) {
|
if ( ptr == (void *) NULL ) {
|
||||||
ptr = (void *) acceleratorAllocShared(bytes);
|
ptr = (void *) acceleratorAllocShared(bytes);
|
||||||
total_shared+=bytes;
|
total_shared+=bytes;
|
||||||
// std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
|
// std::cout <<"SharedAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
|
||||||
// PrintBytes();
|
// PrintBytes();
|
||||||
|
} else {
|
||||||
|
// std::cout <<"SharedAllocate: cache hit Shared pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
|
||||||
}
|
}
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
@ -74,6 +80,9 @@ void *MemoryManager::CpuAllocate(size_t bytes)
|
|||||||
if ( ptr == (void *) NULL ) {
|
if ( ptr == (void *) NULL ) {
|
||||||
ptr = (void *) acceleratorAllocShared(bytes);
|
ptr = (void *) acceleratorAllocShared(bytes);
|
||||||
total_host+=bytes;
|
total_host+=bytes;
|
||||||
|
// std::cout <<"CpuAllocate: allocated Cpu pointer "<<std::hex<<ptr<<std::dec<<std::endl;
|
||||||
|
} else {
|
||||||
|
// std::cout <<"CpufAllocate: cache hit Cpu pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
|
||||||
}
|
}
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
@ -120,7 +129,7 @@ void MemoryManager::Init(void)
|
|||||||
str= getenv("GRID_ALLOC_NCACHE_LARGE");
|
str= getenv("GRID_ALLOC_NCACHE_LARGE");
|
||||||
if ( str ) {
|
if ( str ) {
|
||||||
Nc = atoi(str);
|
Nc = atoi(str);
|
||||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
if ( (Nc>=0) && (Nc <= NallocCacheMax)) {
|
||||||
Ncache[Cpu]=Nc;
|
Ncache[Cpu]=Nc;
|
||||||
Ncache[Acc]=Nc;
|
Ncache[Acc]=Nc;
|
||||||
Ncache[Shared]=Nc;
|
Ncache[Shared]=Nc;
|
||||||
@ -130,7 +139,7 @@ void MemoryManager::Init(void)
|
|||||||
str= getenv("GRID_ALLOC_NCACHE_SMALL");
|
str= getenv("GRID_ALLOC_NCACHE_SMALL");
|
||||||
if ( str ) {
|
if ( str ) {
|
||||||
Nc = atoi(str);
|
Nc = atoi(str);
|
||||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
if ( (Nc>=0) && (Nc <= NallocCacheMax)) {
|
||||||
Ncache[CpuSmall]=Nc;
|
Ncache[CpuSmall]=Nc;
|
||||||
Ncache[AccSmall]=Nc;
|
Ncache[AccSmall]=Nc;
|
||||||
Ncache[SharedSmall]=Nc;
|
Ncache[SharedSmall]=Nc;
|
||||||
@ -211,6 +220,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
|
|||||||
|
|
||||||
if ( entries[v].valid ) {
|
if ( entries[v].valid ) {
|
||||||
ret = entries[v].address;
|
ret = entries[v].address;
|
||||||
|
total_cache-=entries[v].bytes;
|
||||||
entries[v].valid = 0;
|
entries[v].valid = 0;
|
||||||
entries[v].address = NULL;
|
entries[v].address = NULL;
|
||||||
entries[v].bytes = 0;
|
entries[v].bytes = 0;
|
||||||
@ -219,6 +229,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
|
|||||||
entries[v].address=ptr;
|
entries[v].address=ptr;
|
||||||
entries[v].bytes =bytes;
|
entries[v].bytes =bytes;
|
||||||
entries[v].valid =1;
|
entries[v].valid =1;
|
||||||
|
total_cache+=entries[v].bytes;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -243,6 +254,7 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach
|
|||||||
for(int e=0;e<ncache;e++){
|
for(int e=0;e<ncache;e++){
|
||||||
if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
|
if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
|
||||||
entries[e].valid = 0;
|
entries[e].valid = 0;
|
||||||
|
total_cache-=bytes;
|
||||||
return entries[e].address;
|
return entries[e].address;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,8 +93,8 @@ private:
|
|||||||
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
|
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
|
||||||
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
|
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
|
||||||
|
|
||||||
static void PrintBytes(void);
|
|
||||||
public:
|
public:
|
||||||
|
static void PrintBytes(void);
|
||||||
static void Init(void);
|
static void Init(void);
|
||||||
static void InitMessage(void);
|
static void InitMessage(void);
|
||||||
static void *AcceleratorAllocate(size_t bytes);
|
static void *AcceleratorAllocate(size_t bytes);
|
||||||
|
@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/lattice/Lattice_local.h>
|
#include <Grid/lattice/Lattice_local.h>
|
||||||
#include <Grid/lattice/Lattice_reduction.h>
|
#include <Grid/lattice/Lattice_reduction.h>
|
||||||
#include <Grid/lattice/Lattice_peekpoke.h>
|
#include <Grid/lattice/Lattice_peekpoke.h>
|
||||||
//#include <Grid/lattice/Lattice_reality.h>
|
#include <Grid/lattice/Lattice_reality.h>
|
||||||
#include <Grid/lattice/Lattice_real_imag.h>
|
#include <Grid/lattice/Lattice_real_imag.h>
|
||||||
#include <Grid/lattice/Lattice_comparison_utils.h>
|
#include <Grid/lattice/Lattice_comparison_utils.h>
|
||||||
#include <Grid/lattice/Lattice_comparison.h>
|
#include <Grid/lattice/Lattice_comparison.h>
|
||||||
|
@ -52,6 +52,7 @@ public:
|
|||||||
// This will be safe to call from accelerator_for and is trivially copy constructible
|
// This will be safe to call from accelerator_for and is trivially copy constructible
|
||||||
// The copy constructor for this will need to be used by device lambda functions
|
// The copy constructor for this will need to be used by device lambda functions
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#undef LATTICE_BOUNDS_CHECK
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
class LatticeView : public LatticeAccelerator<vobj>
|
class LatticeView : public LatticeAccelerator<vobj>
|
||||||
{
|
{
|
||||||
@ -61,14 +62,36 @@ public:
|
|||||||
void * cpu_ptr;
|
void * cpu_ptr;
|
||||||
#ifdef GRID_SIMT
|
#ifdef GRID_SIMT
|
||||||
accelerator_inline const typename vobj::scalar_object operator()(size_t i) const {
|
accelerator_inline const typename vobj::scalar_object operator()(size_t i) const {
|
||||||
|
#ifdef LATTICE_BOUNDS_CHECK
|
||||||
|
assert(i<this->_odata_size);
|
||||||
|
assert(i>=0);
|
||||||
|
#endif
|
||||||
return coalescedRead(this->_odata[i]);
|
return coalescedRead(this->_odata[i]);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; }
|
accelerator_inline const vobj & operator()(size_t i) const {
|
||||||
|
#ifdef LATTICE_BOUNDS_CHECK
|
||||||
|
assert(i<this->_odata_size);
|
||||||
|
assert(i>=0);
|
||||||
|
#endif
|
||||||
|
return this->_odata[i];
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; };
|
accelerator_inline const vobj & operator[](size_t i) const {
|
||||||
accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; };
|
#ifdef LATTICE_BOUNDS_CHECK
|
||||||
|
assert(i<this->_odata_size);
|
||||||
|
assert(i>=0);
|
||||||
|
#endif
|
||||||
|
return this->_odata[i];
|
||||||
|
};
|
||||||
|
accelerator_inline vobj & operator[](size_t i) {
|
||||||
|
#ifdef LATTICE_BOUNDS_CHECK
|
||||||
|
assert(i<this->_odata_size);
|
||||||
|
assert(i>=0);
|
||||||
|
#endif
|
||||||
|
return this->_odata[i];
|
||||||
|
};
|
||||||
|
|
||||||
accelerator_inline uint64_t begin(void) const { return 0;};
|
accelerator_inline uint64_t begin(void) const { return 0;};
|
||||||
accelerator_inline uint64_t end(void) const { return this->_odata_size; };
|
accelerator_inline uint64_t end(void) const { return this->_odata_size; };
|
||||||
|
@ -77,9 +77,16 @@ const int SpinorIndex = 2;
|
|||||||
template<typename T> struct isSpinor {
|
template<typename T> struct isSpinor {
|
||||||
static constexpr bool value = (SpinorIndex==T::TensorLevel);
|
static constexpr bool value = (SpinorIndex==T::TensorLevel);
|
||||||
};
|
};
|
||||||
|
const int CoarseIndex = 4;
|
||||||
|
template<typename T> struct isCoarsened {
|
||||||
|
static constexpr bool value = (CoarseIndex<=T::TensorLevel);
|
||||||
|
};
|
||||||
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
|
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
|
||||||
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
|
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
|
||||||
|
|
||||||
|
template <typename T> using IfCoarsened = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
|
||||||
|
template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
|
||||||
|
|
||||||
// ChrisK very keen to add extra space for Gparity doubling.
|
// ChrisK very keen to add extra space for Gparity doubling.
|
||||||
//
|
//
|
||||||
// Also add domain wall index, in a way where Wilson operator
|
// Also add domain wall index, in a way where Wilson operator
|
||||||
|
@ -89,7 +89,8 @@ public:
|
|||||||
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
||||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||||
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||||
|
virtual std::vector<int> Directions(void) =0;
|
||||||
|
virtual std::vector<int> Displacements(void)=0;
|
||||||
|
|
||||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
|
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
|
||||||
|
|
||||||
|
@ -44,6 +44,9 @@ public:
|
|||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
typedef StaggeredKernels<Impl> Kernels;
|
typedef StaggeredKernels<Impl> Kernels;
|
||||||
|
|
||||||
|
virtual std::vector<int> Directions(void) { return this->directions; };
|
||||||
|
virtual std::vector<int> Displacements(void){ return this->displacements;};
|
||||||
|
|
||||||
FermionField _tmp;
|
FermionField _tmp;
|
||||||
FermionField &tmp(void) { return _tmp; }
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
|
@ -49,6 +49,9 @@ public:
|
|||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
typedef StaggeredKernels<Impl> Kernels;
|
typedef StaggeredKernels<Impl> Kernels;
|
||||||
|
|
||||||
|
virtual std::vector<int> Directions(void) { return this->directions; };
|
||||||
|
virtual std::vector<int> Displacements(void){ return this->displacements;};
|
||||||
|
|
||||||
FermionField _tmp;
|
FermionField _tmp;
|
||||||
FermionField &tmp(void) { return _tmp; }
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
|
@ -47,6 +47,9 @@ public:
|
|||||||
FermionField _tmp;
|
FermionField _tmp;
|
||||||
FermionField &tmp(void) { return _tmp; }
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
|
virtual std::vector<int> Directions(void) { return this->directions; };
|
||||||
|
virtual std::vector<int> Displacements(void){ return this->displacements;};
|
||||||
|
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
// Performance monitoring
|
// Performance monitoring
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
|
@ -63,6 +63,9 @@ public:
|
|||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
typedef WilsonKernels<Impl> Kernels;
|
typedef WilsonKernels<Impl> Kernels;
|
||||||
|
|
||||||
|
virtual std::vector<int> Directions(void) { return this->directions; };
|
||||||
|
virtual std::vector<int> Displacements(void){ return this->displacements;};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
@ -72,6 +72,9 @@ public:
|
|||||||
typedef WilsonKernels<Impl> Kernels;
|
typedef WilsonKernels<Impl> Kernels;
|
||||||
PmuStat stat;
|
PmuStat stat;
|
||||||
|
|
||||||
|
virtual std::vector<int> Directions(void) { return this->directions; };
|
||||||
|
virtual std::vector<int> Displacements(void){ return this->displacements;};
|
||||||
|
|
||||||
FermionField _tmp;
|
FermionField _tmp;
|
||||||
FermionField &tmp(void) { return _tmp; }
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
|
@ -79,6 +79,8 @@ public:
|
|||||||
_Mat.M(in,tmp);
|
_Mat.M(in,tmp);
|
||||||
G5R5(out,tmp);
|
G5R5(out,tmp);
|
||||||
}
|
}
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -127,6 +129,8 @@ public:
|
|||||||
_Mat.M(in,tmp);
|
_Mat.M(in,tmp);
|
||||||
out=g5*tmp;
|
out=g5*tmp;
|
||||||
}
|
}
|
||||||
|
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||||
|
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||||
};
|
};
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -133,14 +133,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
|||||||
pickCheckerboard(Even, CloverTermEven, CloverTerm);
|
pickCheckerboard(Even, CloverTermEven, CloverTerm);
|
||||||
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
|
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
|
||||||
|
|
||||||
pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm)));
|
pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
|
||||||
pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm)));
|
pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
|
||||||
|
|
||||||
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
|
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
|
||||||
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
|
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
|
||||||
|
|
||||||
pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv)));
|
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
|
||||||
pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv)));
|
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -128,7 +128,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
hspin(0)=fspin(0)-fspin(2);
|
hspin(0)=fspin(0)-fspin(2);
|
||||||
hspin(1)=fspin(1)-fspin(3);
|
hspin(1)=fspin(1)-fspin(3);
|
||||||
}
|
}
|
||||||
@ -138,40 +137,50 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
* 0 0 -1 0
|
* 0 0 -1 0
|
||||||
* 0 0 0 -1
|
* 0 0 0 -1
|
||||||
*/
|
*/
|
||||||
|
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
hspin(0)=fspin(0);
|
hspin(0)=fspin(0);
|
||||||
hspin(1)=fspin(1);
|
hspin(1)=fspin(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
hspin(0)=fspin(2);
|
hspin(0)=fspin(2);
|
||||||
hspin(1)=fspin(3);
|
hspin(1)=fspin(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
// template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
rfspin(0)=fspin(0);
|
rfspin(0)=fspin(0);
|
||||||
rfspin(1)=fspin(1);
|
rfspin(1)=fspin(1);
|
||||||
rfspin(2)=Zero();
|
rfspin(2)=Zero();
|
||||||
rfspin(3)=Zero();
|
rfspin(3)=Zero();
|
||||||
}
|
}
|
||||||
// template<class vtype> accelerator_inline void fspProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
rfspin(0)=Zero();
|
rfspin(0)=Zero();
|
||||||
rfspin(1)=Zero();
|
rfspin(1)=Zero();
|
||||||
rfspin(2)=fspin(2);
|
rfspin(2)=fspin(2);
|
||||||
rfspin(3)=fspin(3);
|
rfspin(3)=fspin(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
|
||||||
|
{
|
||||||
|
const int hN = N>>1;
|
||||||
|
for(int s=0;s<hN;s++){
|
||||||
|
rfspin(s)=fspin(s);
|
||||||
|
rfspin(s+hN)=Zero();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
|
||||||
|
{
|
||||||
|
const int hN = N>>1;
|
||||||
|
for(int s=0;s<hN;s++){
|
||||||
|
rfspin(s)=Zero();
|
||||||
|
rfspin(s+hN)=fspin(s+hN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Reconstruction routines to move back again to four spin
|
// Reconstruction routines to move back again to four spin
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -183,7 +192,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
*/
|
*/
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=timesMinusI(hspin(1));
|
fspin(2)=timesMinusI(hspin(1));
|
||||||
@ -191,7 +199,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=timesI(hspin(1));
|
fspin(2)=timesI(hspin(1));
|
||||||
@ -199,7 +206,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)-=timesI(hspin(1));
|
fspin(2)-=timesI(hspin(1));
|
||||||
@ -207,7 +213,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)+=timesI(hspin(1));
|
fspin(2)+=timesI(hspin(1));
|
||||||
@ -221,7 +226,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
|
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)= hspin(1);
|
fspin(2)= hspin(1);
|
||||||
@ -229,7 +233,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=-hspin(1);
|
fspin(2)=-hspin(1);
|
||||||
@ -237,7 +240,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)+=hspin(1);
|
fspin(2)+=hspin(1);
|
||||||
@ -245,7 +247,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)-=hspin(1);
|
fspin(2)-=hspin(1);
|
||||||
@ -260,7 +261,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
*/
|
*/
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=timesMinusI(hspin(0));
|
fspin(2)=timesMinusI(hspin(0));
|
||||||
@ -268,7 +268,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)= timesI(hspin(0));
|
fspin(2)= timesI(hspin(0));
|
||||||
@ -276,7 +275,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)-=timesI(hspin(0));
|
fspin(2)-=timesI(hspin(0));
|
||||||
@ -284,7 +282,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)+=timesI(hspin(0));
|
fspin(2)+=timesI(hspin(0));
|
||||||
@ -298,7 +295,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
*/
|
*/
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=hspin(0);
|
fspin(2)=hspin(0);
|
||||||
@ -306,7 +302,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=-hspin(0);
|
fspin(2)=-hspin(0);
|
||||||
@ -314,7 +309,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)+=hspin(0);
|
fspin(2)+=hspin(0);
|
||||||
@ -322,7 +316,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)-=hspin(0);
|
fspin(2)-=hspin(0);
|
||||||
@ -336,7 +329,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
*/
|
*/
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
|
fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
|
||||||
fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
|
fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
|
||||||
fspin(2)=Zero();
|
fspin(2)=Zero();
|
||||||
@ -344,7 +336,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=Zero();
|
fspin(0)=Zero();
|
||||||
fspin(1)=Zero();
|
fspin(1)=Zero();
|
||||||
fspin(2)=hspin(0)+hspin(0);
|
fspin(2)=hspin(0)+hspin(0);
|
||||||
@ -352,7 +343,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0)+hspin(0);
|
fspin(0)+=hspin(0)+hspin(0);
|
||||||
fspin(1)+=hspin(1)+hspin(1);
|
fspin(1)+=hspin(1)+hspin(1);
|
||||||
}
|
}
|
||||||
@ -372,7 +362,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
//////////
|
//////////
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjXp(hspin._internal[i],fspin._internal[i]);
|
spProjXp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
@ -426,26 +415,21 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iM
|
|||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////
|
////////
|
||||||
// Xm
|
// Xm
|
||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjXm(hspin._internal,fspin._internal);
|
spProjXm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjXm(hspin._internal[i],fspin._internal[i]);
|
spProjXm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -455,19 +439,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatri
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconXm(hspin._internal,fspin._internal);
|
spReconXm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconXm(hspin._internal[i],fspin._internal[i]);
|
spReconXm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -476,45 +457,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconXm(hspin._internal,fspin._internal);
|
accumReconXm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconXm(hspin._internal[i],fspin._internal[i]);
|
accumReconXm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////
|
////////
|
||||||
// Yp
|
// Yp
|
||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjYp(hspin._internal,fspin._internal);
|
spProjYp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjYp(hspin._internal[i],fspin._internal[i]);
|
spProjYp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -524,19 +497,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatri
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconYp(hspin._internal,fspin._internal);
|
spReconYp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconYp(hspin._internal[i],fspin._internal[i]);
|
spReconYp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -545,66 +515,55 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconYp(hspin._internal,fspin._internal);
|
accumReconYp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconYp(hspin._internal[i],fspin._internal[i]);
|
accumReconYp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
////////
|
////////
|
||||||
// Ym
|
// Ym
|
||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjYm(hspin._internal,fspin._internal);
|
spProjYm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjYm(hspin._internal[i],fspin._internal[i]);
|
spProjYm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconYm(hspin._internal,fspin._internal);
|
spReconYm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,const iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconYm(hspin._internal[i],fspin._internal[i]);
|
spReconYm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -613,19 +572,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconYm(hspin._internal,fspin._internal);
|
accumReconYm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconYm(hspin._internal[i],fspin._internal[i]);
|
accumReconYm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -638,66 +594,57 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iM
|
|||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjZp(hspin._internal,fspin._internal);
|
spProjZp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjZp(hspin._internal[i],fspin._internal[i]);
|
spProjZp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconZp(hspin._internal,fspin._internal);
|
spReconZp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconZp(hspin._internal[i],fspin._internal[i]);
|
spReconZp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconZp(hspin._internal,fspin._internal);
|
accumReconZp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconZp(hspin._internal[i],fspin._internal[i]);
|
accumReconZp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -706,62 +653,53 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iM
|
|||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjZm(hspin._internal,fspin._internal);
|
spProjZm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjZm(hspin._internal[i],fspin._internal[i]);
|
spProjZm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconZm(hspin._internal,fspin._internal);
|
spReconZm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconZm(hspin._internal[i],fspin._internal[i]);
|
spReconZm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconZm(hspin._internal,fspin._internal);
|
accumReconZm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconZm(hspin._internal[i],fspin._internal[i]);
|
accumReconZm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -774,41 +712,35 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iM
|
|||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjTp(hspin._internal,fspin._internal);
|
spProjTp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjTp(hspin._internal[i],fspin._internal[i]);
|
spProjTp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconTp(hspin._internal,fspin._internal);
|
spReconTp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconTp(hspin._internal[i],fspin._internal[i]);
|
spReconTp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -817,44 +749,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconTp(hspin._internal,fspin._internal);
|
accumReconTp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconTp(hspin._internal[i],fspin._internal[i]);
|
accumReconTp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
////////
|
////////
|
||||||
// Tm
|
// Tm
|
||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjTm(hspin._internal,fspin._internal);
|
spProjTm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjTm(hspin._internal[i],fspin._internal[i]);
|
spProjTm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -864,19 +789,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatri
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconTm(hspin._internal,fspin._internal);
|
spReconTm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconTm(hspin._internal[i],fspin._internal[i]);
|
spReconTm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -885,44 +807,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconTm(hspin._internal,fspin._internal);
|
accumReconTm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconTm(hspin._internal[i],fspin._internal[i]);
|
accumReconTm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
////////
|
////////
|
||||||
// 5p
|
// 5p
|
||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProj5p(hspin._internal,fspin._internal);
|
spProj5p(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProj5p(hspin._internal[i],fspin._internal[i]);
|
spProj5p(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -931,19 +846,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatri
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spRecon5p(hspin._internal,fspin._internal);
|
spRecon5p(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spRecon5p(hspin._internal[i],fspin._internal[i]);
|
spRecon5p(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -952,19 +864,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumRecon5p(hspin._internal,fspin._internal);
|
accumRecon5p(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumRecon5p(hspin._internal[i],fspin._internal[i]);
|
accumRecon5p(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -972,24 +881,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iM
|
|||||||
}
|
}
|
||||||
|
|
||||||
// four spinor projectors for chiral proj
|
// four spinor projectors for chiral proj
|
||||||
// template<class vtype> accelerator_inline void fspProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
template<class vtype> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProj5p(hspin._internal,fspin._internal);
|
spProj5p(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
// template<class vtype,int N> accelerator_inline void fspProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
|
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProj5p(hspin._internal[i],fspin._internal[i]);
|
spProj5p(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// template<class vtype,int N> accelerator_inline void fspProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
|
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -1001,17 +904,17 @@ template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &
|
|||||||
// 5m
|
// 5m
|
||||||
////////
|
////////
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
spProj5m(hspin._internal,fspin._internal);
|
spProj5m(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProj5m(hspin._internal[i],fspin._internal[i]);
|
spProj5m(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
@ -1021,40 +924,34 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatri
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spRecon5m(hspin._internal,fspin._internal);
|
spRecon5m(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spRecon5m(hspin._internal[i],fspin._internal[i]);
|
spRecon5m(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumRecon5m(hspin._internal,fspin._internal);
|
accumRecon5m(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumRecon5m(hspin._internal[i],fspin._internal[i]);
|
accumRecon5m(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -1063,24 +960,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iM
|
|||||||
|
|
||||||
|
|
||||||
// four spinor projectors for chiral proj
|
// four spinor projectors for chiral proj
|
||||||
// template<class vtype> accelerator_inline void fspProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
template<class vtype> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProj5m(hspin._internal,fspin._internal);
|
spProj5m(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
// template<class vtype,int N> accelerator_inline void fspProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
|
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProj5m(hspin._internal[i],fspin._internal[i]);
|
spProj5m(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// template<class vtype,int N> accelerator_inline void fspProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
|
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
|
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
|
@ -154,8 +154,8 @@ void axpby_ssp_pminus(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,co
|
|||||||
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
||||||
uint64_t ss = sss*Ls;
|
uint64_t ss = sss*Ls;
|
||||||
decltype(coalescedRead(y_v[ss+sp])) tmp;
|
decltype(coalescedRead(y_v[ss+sp])) tmp;
|
||||||
spProj5m(tmp,y_v(ss+sp));
|
spProj5m(tmp,y_v(ss+sp));
|
||||||
tmp = a*x_v(ss+s)+b*tmp;
|
tmp = a*x_v(ss+s)+b*tmp;
|
||||||
coalescedWrite(z_v[ss+s],tmp);
|
coalescedWrite(z_v[ss+s],tmp);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -188,7 +188,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
|
|||||||
z.Checkerboard() = x.Checkerboard();
|
z.Checkerboard() = x.Checkerboard();
|
||||||
conformable(x,z);
|
conformable(x,z);
|
||||||
int Ls = grid->_rdimensions[0];
|
int Ls = grid->_rdimensions[0];
|
||||||
Gamma G5(Gamma::Algebra::Gamma5);
|
|
||||||
autoView( x_v, x, AcceleratorRead);
|
autoView( x_v, x, AcceleratorRead);
|
||||||
autoView( z_v, z, AcceleratorWrite);
|
autoView( z_v, z, AcceleratorWrite);
|
||||||
uint64_t nloop = grid->oSites()/Ls;
|
uint64_t nloop = grid->oSites()/Ls;
|
||||||
@ -196,7 +195,13 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
|
|||||||
uint64_t ss = sss*Ls;
|
uint64_t ss = sss*Ls;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
int sp = Ls-1-s;
|
int sp = Ls-1-s;
|
||||||
coalescedWrite(z_v[ss+sp],G5*x_v(ss+s));
|
auto tmp = x_v(ss+s);
|
||||||
|
decltype(tmp) tmp_p;
|
||||||
|
decltype(tmp) tmp_m;
|
||||||
|
spProj5p(tmp_p,tmp);
|
||||||
|
spProj5m(tmp_m,tmp);
|
||||||
|
// Use of spProj5m, 5p captures the coarse space too
|
||||||
|
coalescedWrite(z_v[ss+sp],tmp_p - tmp_m);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -208,10 +213,20 @@ void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
|
|||||||
z.Checkerboard() = x.Checkerboard();
|
z.Checkerboard() = x.Checkerboard();
|
||||||
conformable(x, z);
|
conformable(x, z);
|
||||||
|
|
||||||
Gamma G5(Gamma::Algebra::Gamma5);
|
autoView( x_v, x, AcceleratorRead);
|
||||||
z = G5 * x;
|
autoView( z_v, z, AcceleratorWrite);
|
||||||
|
uint64_t nloop = grid->oSites();
|
||||||
|
accelerator_for(ss,nloop,vobj::Nsimd(),{
|
||||||
|
auto tmp = x_v(ss);
|
||||||
|
decltype(tmp) tmp_p;
|
||||||
|
decltype(tmp) tmp_m;
|
||||||
|
spProj5p(tmp_p,tmp);
|
||||||
|
spProj5m(tmp_m,tmp);
|
||||||
|
coalescedWrite(z_v[ss],tmp_p - tmp_m);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
template<class CComplex, int nbasis>
|
template<class CComplex, int nbasis>
|
||||||
void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
|
void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
|
||||||
{
|
{
|
||||||
@ -234,6 +249,7 @@ void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex,
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
@ -151,6 +151,9 @@ inline void *acceleratorAllocShared(size_t bytes)
|
|||||||
ptr = (void *) NULL;
|
ptr = (void *) NULL;
|
||||||
printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
|
printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
|
||||||
}
|
}
|
||||||
|
// size_t free,total;
|
||||||
|
// cudaMemGetInfo(&free,&total);
|
||||||
|
// std::cout << "Malloc managed "<<bytes<<" "<<free<<"/"<<total<<std::endl;
|
||||||
return ptr;
|
return ptr;
|
||||||
};
|
};
|
||||||
inline void *acceleratorAllocDevice(size_t bytes)
|
inline void *acceleratorAllocDevice(size_t bytes)
|
||||||
@ -161,6 +164,9 @@ inline void *acceleratorAllocDevice(size_t bytes)
|
|||||||
ptr = (void *) NULL;
|
ptr = (void *) NULL;
|
||||||
printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
|
printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
|
||||||
}
|
}
|
||||||
|
// size_t free,total;
|
||||||
|
// cudaMemGetInfo(&free,&total);
|
||||||
|
// std::cout << "Malloc device "<<bytes<<" "<<free<<"/"<<total<<std::endl;
|
||||||
return ptr;
|
return ptr;
|
||||||
};
|
};
|
||||||
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
|
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
|
||||||
|
@ -191,7 +191,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=LMIN;lat<=LMAX;lat+=LADD){
|
for(int lat=LMIN;lat<=LMAX;lat+=LADD){
|
||||||
@ -216,15 +216,16 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
|
|
||||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||||
|
double ncbytes=5*vol*Nc*Nc*sizeof(Complex);
|
||||||
double flops=Nc*Nc*(6+8+8)*vol;
|
double flops=Nc*Nc*(6+8+8)*vol;
|
||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<ncbytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if 1
|
#if 1
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x * Cshift(y)"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x * Cshift(y)"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=LMIN;lat<=LMAX;lat+=LADD){
|
for(int lat=LMIN;lat<=LMAX;lat+=LADD){
|
||||||
@ -258,10 +259,11 @@ int main (int argc, char ** argv)
|
|||||||
tmult = tmult /Nloop;
|
tmult = tmult /Nloop;
|
||||||
|
|
||||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||||
|
double ncbytes=5*vol*Nc*Nc*sizeof(Complex);
|
||||||
double flops=Nc*Nc*(6+8+8)*vol;
|
double flops=Nc*Nc*(6+8+8)*vol;
|
||||||
std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
|
||||||
time = time * 1000; // convert to NS for GB/s
|
time = time * 1000; // convert to NS for GB/s
|
||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" <<ncbytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -29,7 +29,91 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
;
|
|
||||||
|
void MomentumSpacePropagatorTest(RealD mass,RealD M5, LatticePropagator &prop)
|
||||||
|
{
|
||||||
|
// what type LatticeComplex
|
||||||
|
GridBase *_grid = prop.Grid();
|
||||||
|
|
||||||
|
typedef LatticeFermion FermionField;
|
||||||
|
typedef LatticePropagator PropagatorField;
|
||||||
|
typedef typename FermionField::vector_type vector_type;
|
||||||
|
typedef typename FermionField::scalar_type ScalComplex;
|
||||||
|
typedef iSinglet<ScalComplex> Tcomplex;
|
||||||
|
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
||||||
|
|
||||||
|
Gamma::Algebra Gmu [] = {
|
||||||
|
Gamma::Algebra::GammaX,
|
||||||
|
Gamma::Algebra::GammaY,
|
||||||
|
Gamma::Algebra::GammaZ,
|
||||||
|
Gamma::Algebra::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
Coordinate latt_size = _grid->_fdimensions;
|
||||||
|
|
||||||
|
PropagatorField num (_grid); num = Zero();
|
||||||
|
|
||||||
|
LatComplex sk(_grid); sk = Zero();
|
||||||
|
LatComplex sk2(_grid); sk2= Zero();
|
||||||
|
LatComplex W(_grid); W= Zero();
|
||||||
|
LatComplex a(_grid); a= Zero();
|
||||||
|
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
||||||
|
LatComplex denom(_grid); denom= Zero();
|
||||||
|
LatComplex cosha(_grid);
|
||||||
|
LatComplex kmu(_grid);
|
||||||
|
LatComplex Wea(_grid);
|
||||||
|
LatComplex Wema(_grid);
|
||||||
|
|
||||||
|
ScalComplex ci(0.0,1.0);
|
||||||
|
SpinColourMatrixD identity = ComplexD(1.0);
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++) {
|
||||||
|
|
||||||
|
LatticeCoordinate(kmu,mu);
|
||||||
|
|
||||||
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||||
|
|
||||||
|
kmu = TwoPiL * kmu;
|
||||||
|
// kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
||||||
|
|
||||||
|
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
|
||||||
|
sk = sk + sin(kmu) *sin(kmu);
|
||||||
|
|
||||||
|
num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*identity);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
W = one - M5 + sk2;
|
||||||
|
|
||||||
|
////////////////////////////////////////////
|
||||||
|
// Cosh alpha -> alpha
|
||||||
|
////////////////////////////////////////////
|
||||||
|
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
||||||
|
|
||||||
|
// FIXME Need a Lattice acosh
|
||||||
|
{
|
||||||
|
autoView(cosha_v,cosha,CpuRead);
|
||||||
|
autoView(a_v,a,CpuWrite);
|
||||||
|
for(int idx=0;idx<_grid->lSites();idx++){
|
||||||
|
Coordinate lcoor(Nd);
|
||||||
|
Tcomplex cc;
|
||||||
|
// RealD sgn;
|
||||||
|
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
peekLocalSite(cc,cosha_v,lcoor);
|
||||||
|
assert((double)real(cc)>=1.0);
|
||||||
|
assert(fabs((double)imag(cc))<=1.0e-15);
|
||||||
|
cc = ScalComplex(::acosh(real(cc)),0.0);
|
||||||
|
pokeLocalSite(cc,a_v,lcoor);
|
||||||
|
}}
|
||||||
|
|
||||||
|
Wea = ( exp( a) * abs(W) );
|
||||||
|
Wema= ( exp(-a) * abs(W) );
|
||||||
|
|
||||||
|
num = num + ( one - Wema ) * mass * identity;
|
||||||
|
denom= ( Wea - one ) + mass*mass * (one - Wema);
|
||||||
|
prop = num/denom;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
@ -307,6 +391,17 @@ int main (int argc, char ** argv)
|
|||||||
RealD M5 =0.8;
|
RealD M5 =0.8;
|
||||||
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5);
|
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5);
|
||||||
|
|
||||||
|
/////////////////// Test code for (1-m)^2 ///////////////
|
||||||
|
LatticePropagatorD prop1(&GRID);
|
||||||
|
LatticePropagatorD prop2(&GRID);
|
||||||
|
LatticeComplexD ratio(&GRID);
|
||||||
|
MomentumSpacePropagatorTest(0.0,M5,prop1);
|
||||||
|
MomentumSpacePropagatorTest(0.3,M5,prop2);
|
||||||
|
ratio=localNorm2(prop2);
|
||||||
|
ratio=ratio/localNorm2(prop1);
|
||||||
|
std::cout << ratio;
|
||||||
|
/////////////////// Test code for (1-m)^2 factor ///////////////
|
||||||
|
|
||||||
// Momentum space prop
|
// Momentum space prop
|
||||||
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
|
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
|
||||||
bool fiveD = false; //calculate 4d free propagator
|
bool fiveD = false; //calculate 4d free propagator
|
||||||
|
@ -565,8 +565,8 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " Running Multigrid "<< std::endl;
|
std::cout<<GridLogMessage << " Running Multigrid "<< std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
BiCGSTAB<CoarseVector> CoarseMgridBiCGSTAB(0.01,1000);
|
BiCGSTAB<CoarseVector> CoarseMgridBiCGSTAB(0.01,1000,false);
|
||||||
BiCGSTAB<LatticeFermion> FineMgridBiCGSTAB(0.0,24);
|
BiCGSTAB<LatticeFermion> FineMgridBiCGSTAB(0.0,24,false);
|
||||||
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
ZeroGuesser<LatticeFermion> FineZeroGuesser;
|
ZeroGuesser<LatticeFermion> FineZeroGuesser;
|
||||||
|
|
||||||
@ -590,5 +590,5 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "Done "<< std::endl;
|
std::cout<<GridLogMessage << "Done "<< std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user