mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 20:57:06 +01:00
Compare commits
26 Commits
feature/be
...
feature/hw
Author | SHA1 | Date | |
---|---|---|---|
3064c9a6e2 | |||
729882827c | |||
baa668d3ac | |||
3c82d16ed8 | |||
5c8c0c2d7c | |||
e5a100846c | |||
a74e2dc12e | |||
595f512a6e | |||
a6499b22ff | |||
b4e42a59c6 | |||
8c913e0edd | |||
fd3f93d8d3 | |||
e9543cdacd | |||
98f7b3d298 | |||
b7b164ea24 | |||
77124d99d5 | |||
e1327e7ea0 | |||
569f78c2cf | |||
488c79d5a1 | |||
dc6b0f20b2 | |||
c0badc3e16 | |||
58f6529b55 | |||
e3f056dfbb | |||
da0ffa7a79 | |||
fcc7640b9c | |||
0cbe2859e0 |
@ -34,12 +34,6 @@
|
||||
#define __SYCL__REDEFINE__
|
||||
#endif
|
||||
|
||||
/* HIP save and restore compile environment*/
|
||||
#ifdef GRID_HIP
|
||||
#pragma push
|
||||
#pragma push_macro("__HIP_DEVICE_COMPILE__")
|
||||
#endif
|
||||
#define EIGEN_NO_HIP
|
||||
|
||||
#include <Grid/Eigen/Dense>
|
||||
#include <Grid/Eigen/unsupported/CXX11/Tensor>
|
||||
@ -58,12 +52,6 @@
|
||||
#pragma pop
|
||||
#endif
|
||||
|
||||
/*HIP restore*/
|
||||
#ifdef __HIP__REDEFINE__
|
||||
#pragma pop_macro("__HIP_DEVICE_COMPILE__")
|
||||
#pragma pop
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
@ -49,13 +49,11 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
|
||||
Lattice<dotp> fine_inner_msk(fine);
|
||||
|
||||
// Multiply could be fused with innerProduct
|
||||
// Single block sum kernel could do both masks.
|
||||
fine_inner = localInnerProduct(fineX,fineY);
|
||||
mult(fine_inner_msk, fine_inner,FineMask);
|
||||
blockSum(CoarseInner,fine_inner_msk);
|
||||
}
|
||||
|
||||
|
||||
class Geometry {
|
||||
public:
|
||||
int npoint;
|
||||
@ -80,8 +78,12 @@ public:
|
||||
}
|
||||
directions [2*_d]=0;
|
||||
displacements[2*_d]=0;
|
||||
}
|
||||
|
||||
std::cout <<GridLogMessage << "Geometry "<<std::endl;
|
||||
for(int p=0;p<npoint;p++){
|
||||
std::cout <<GridLogMessage << "point " <<p<<" dir "<<directions[p]<<" delta " <<displacements[p]<<std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
@ -102,8 +104,8 @@ public:
|
||||
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
|
||||
CoarseGrid(_CoarseGrid),
|
||||
FineGrid(_FineGrid),
|
||||
subspace(nbasis,_FineGrid),
|
||||
checkerboard(_checkerboard)
|
||||
checkerboard(_checkerboard),
|
||||
subspace(nbasis,_FineGrid)
|
||||
{
|
||||
};
|
||||
|
||||
@ -285,6 +287,8 @@ public:
|
||||
///////////////////////
|
||||
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
|
||||
|
||||
virtual std::vector<int> Directions(void) { return geom.directions; };
|
||||
virtual std::vector<int> Displacements(void){ return geom.displacements; };
|
||||
void M (const CoarseVector &in, CoarseVector &out)
|
||||
{
|
||||
conformable(_grid,in.Grid());
|
||||
@ -308,6 +312,9 @@ public:
|
||||
|
||||
int osites=Grid()->oSites();
|
||||
|
||||
autoView(st,Stencil,AcceleratorRead);
|
||||
siteVector *CBp=Stencil.CommBuf();
|
||||
|
||||
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
||||
int ss = sss/nbasis;
|
||||
int b = sss%nbasis;
|
||||
@ -318,12 +325,12 @@ public:
|
||||
|
||||
for(int point=0;point<geom.npoint;point++){
|
||||
|
||||
SE=Stencil.GetEntry(ptype,point,ss);
|
||||
SE=st.GetEntry(ptype,point,ss);
|
||||
|
||||
if(SE->_is_local) {
|
||||
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
|
||||
} else {
|
||||
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]);
|
||||
nbr = coalescedRead(CBp[SE->_offset]);
|
||||
}
|
||||
acceleratorSynchronise();
|
||||
|
||||
@ -332,7 +339,7 @@ public:
|
||||
}
|
||||
}
|
||||
coalescedWrite(out_v[ss](b),res);
|
||||
});
|
||||
});
|
||||
|
||||
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose();
|
||||
};
|
||||
@ -409,38 +416,23 @@ public:
|
||||
MdirCalc(in,out[p],p);
|
||||
}
|
||||
};
|
||||
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
|
||||
|
||||
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp)
|
||||
{
|
||||
this->MdirComms(in);
|
||||
|
||||
int ndim = in.Grid()->Nd();
|
||||
|
||||
//////////////
|
||||
// 4D action like wilson
|
||||
// 0+ => 0
|
||||
// 0- => 1
|
||||
// 1+ => 2
|
||||
// 1- => 3
|
||||
// etc..
|
||||
//////////////
|
||||
// 5D action like DWF
|
||||
// 1+ => 0
|
||||
// 1- => 1
|
||||
// 2+ => 2
|
||||
// 2- => 3
|
||||
// etc..
|
||||
auto point = [dir, disp, ndim](){
|
||||
if(dir == 0 and disp == 0)
|
||||
return 8;
|
||||
else if ( ndim==4 ) {
|
||||
return (4 * dir + 1 - disp) / 2;
|
||||
} else {
|
||||
return (4 * (dir-1) + 1 - disp) / 2;
|
||||
}
|
||||
}();
|
||||
int point=-1;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
if( (dir==geom.directions[p])&&(disp==geom.displacements[p])) point=p;
|
||||
}
|
||||
assert(point!=-1);// Must find
|
||||
|
||||
std::cout <<GridLogMessage << "Mdir point "<<point<<" dir "<<dir<<" disp "<<disp <<std::endl;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
std::cout <<GridLogMessage << "point " <<p<<" dir "<<geom.directions[p]<<" delta " <<geom.displacements[p]<<std::endl;
|
||||
}
|
||||
MdirCalc(in,out,point);
|
||||
|
||||
};
|
||||
|
||||
void Mdiag(const CoarseVector &in, CoarseVector &out)
|
||||
@ -456,10 +448,58 @@ public:
|
||||
geom(CoarseGrid._ndimension),
|
||||
hermitian(hermitian_),
|
||||
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
|
||||
A(geom.npoint,&CoarseGrid)
|
||||
A(geom.npoint,&CoarseGrid)
|
||||
{
|
||||
};
|
||||
|
||||
void Test(Aggregation<Fobj,CComplex,nbasis> &_Aggregates,GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop)
|
||||
{
|
||||
typedef Lattice<Fobj> FineField;
|
||||
CoarseVector Cin(_grid);
|
||||
CoarseVector Cout(_grid);
|
||||
CoarseVector CFout(_grid);
|
||||
|
||||
FineField Fin(FineGrid);
|
||||
FineField Fout(FineGrid);
|
||||
|
||||
|
||||
std::vector<int> seeds({1,2,3,4,5});
|
||||
GridParallelRNG RNG(_grid); RNG.SeedFixedIntegers(seeds);
|
||||
gaussian(RNG,Cin);
|
||||
|
||||
_Aggregates.PromoteFromSubspace(Cin,Fin);
|
||||
_Aggregates.ProjectToSubspace(Cin,Fin);
|
||||
|
||||
std::cout << GridLogMessage<< "************ "<<std::endl;
|
||||
std::cout << GridLogMessage<< " Testing M "<<std::endl;
|
||||
std::cout << GridLogMessage<< "************ "<<std::endl;
|
||||
// Coarse operator
|
||||
this->M(Cin,Cout);
|
||||
// Fine projected operator
|
||||
_Aggregates.PromoteFromSubspace(Cin,Fin);
|
||||
linop.Op(Fin,Fout);
|
||||
_Aggregates.ProjectToSubspace(CFout,Fout);
|
||||
|
||||
CFout = CFout-Cout;
|
||||
RealD diff = norm2(CFout);
|
||||
std::cout << GridLogMessage<< " diff "<<diff<<std::endl;
|
||||
assert(diff<1.0e-5);
|
||||
|
||||
std::cout << GridLogMessage<< "************ "<<std::endl;
|
||||
std::cout << GridLogMessage<< " Testing Mdag "<<std::endl;
|
||||
std::cout << GridLogMessage<< "************ "<<std::endl;
|
||||
// Coarse operator
|
||||
Mdag(Cin,Cout);
|
||||
// Fine operator
|
||||
linop.AdjOp(Fin,Fout);
|
||||
_Aggregates.ProjectToSubspace(CFout,Fout);
|
||||
|
||||
CFout = CFout-Cout;
|
||||
diff = norm2(CFout);
|
||||
std::cout << GridLogMessage<< " diff "<<diff<<std::endl;
|
||||
assert(diff<1.0e-5);
|
||||
}
|
||||
|
||||
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> & Subspace)
|
||||
{
|
||||
@ -496,8 +536,19 @@ public:
|
||||
|
||||
CoarseScalar InnerProd(Grid());
|
||||
|
||||
std::cout << GridLogMessage<< "CoarsenMatrix Orthog " << std::endl;
|
||||
// Orthogonalise the subblocks over the basis
|
||||
blockOrthogonalise(InnerProd,Subspace.subspace);
|
||||
std::cout << GridLogMessage<< "CoarsenMatrix Orthog done " << std::endl;
|
||||
|
||||
auto OpDirections = linop.Directions();
|
||||
auto OpDisplacements = linop.Displacements();
|
||||
|
||||
std::cout<<" Coarsening an operator with "<< OpDirections.size()<<" terms "<<std::endl;
|
||||
for(int p=0;p<OpDirections.size();p++) {
|
||||
assert(OpDirections[p]==geom.directions[p]);
|
||||
assert(OpDisplacements[p]==geom.displacements[p]);
|
||||
}
|
||||
|
||||
// Compute the matrix elements of linop between this orthonormal
|
||||
// set of vectors.
|
||||
@ -533,13 +584,27 @@ public:
|
||||
evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
|
||||
oddmask = one-evenmask;
|
||||
|
||||
/*
|
||||
{
|
||||
phi=Subspace.subspace[0];
|
||||
linop.OpDirAll(phi,Mphi_p);
|
||||
for(int p=0;p<geom.npoint-1;p++){
|
||||
int dir=geom.directions[p];
|
||||
int disp=geom.displacements[p];
|
||||
linop.OpDir(phi,Mphi,dir,disp);
|
||||
Mphi=Mphi-Mphi_p[p];
|
||||
std::cout << GridLogMessage <<" Direction mapping check " <<norm2(Mphi)<<std::endl;
|
||||
}
|
||||
}
|
||||
*/
|
||||
assert(self_stencil!=-1);
|
||||
int lhermitian=hermitian;
|
||||
|
||||
for(int i=0;i<nbasis;i++){
|
||||
|
||||
phi=Subspace.subspace[i];
|
||||
|
||||
// std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
|
||||
std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
|
||||
linop.OpDirAll(phi,Mphi_p);
|
||||
linop.OpDiag (phi,Mphi_p[geom.npoint-1]);
|
||||
|
||||
@ -550,7 +615,7 @@ public:
|
||||
int dir = geom.directions[p];
|
||||
int disp = geom.displacements[p];
|
||||
|
||||
if ( (disp==-1) || (!hermitian ) ) {
|
||||
if ( (disp==-1) || (!lhermitian ) ) {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Pick out contributions coming from this cell and neighbour cell
|
||||
@ -568,11 +633,23 @@ public:
|
||||
autoView( A_self , A[self_stencil], AcceleratorWrite);
|
||||
|
||||
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
||||
|
||||
if ( lhermitian && (disp==-1) ) {
|
||||
for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>*
|
||||
int dirp = geom.directions[pp];
|
||||
int dispp = geom.displacements[pp];
|
||||
if ( (dirp==dir) && (dispp==1) ){
|
||||
auto sft = conjugate(Cshift(oZProj,dir,1));
|
||||
autoView( sft_v , sft , AcceleratorWrite);
|
||||
autoView( A_pp , A[pp], AcceleratorWrite);
|
||||
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage<< "CoarsenMatrix Diag "<<std::endl;
|
||||
///////////////////////////////////////////
|
||||
// Faster alternate self coupling.. use hermiticity to save 2x
|
||||
///////////////////////////////////////////
|
||||
@ -604,31 +681,35 @@ public:
|
||||
|
||||
}
|
||||
}
|
||||
if(hermitian) {
|
||||
std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
|
||||
ForceHermitian();
|
||||
|
||||
MemoryManager::PrintBytes();
|
||||
|
||||
// Auto self test
|
||||
Test( Subspace,FineGrid,linop);
|
||||
|
||||
#if 0
|
||||
///////////////////////////
|
||||
// test code worth preserving in if block
|
||||
///////////////////////////
|
||||
std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl;
|
||||
std::cout<<GridLogMessage<< "\n"<<A[p] << std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl;
|
||||
|
||||
phi=Subspace.subspace[0];
|
||||
std::vector<int> bc(FineGrid->_ndimension,0);
|
||||
blockPick(Grid(),phi,tmp,bc); // Pick out a block
|
||||
linop.Op(tmp,Mphi); // Apply big dop
|
||||
blockProject(iProj,Mphi,Subspace.subspace); // project it and print it
|
||||
std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl;
|
||||
std::cout<<GridLogMessage<< iProj <<std::endl;
|
||||
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void ForceHermitian(void) {
|
||||
CoarseMatrix Diff (Grid());
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
int dir = geom.directions[p];
|
||||
int disp = geom.displacements[p];
|
||||
if(disp==-1) {
|
||||
// Find the opposite link
|
||||
for(int pp=0;pp<geom.npoint;pp++){
|
||||
int dirp = geom.directions[pp];
|
||||
int dispp = geom.displacements[pp];
|
||||
if ( (dirp==dir) && (dispp==1) ){
|
||||
// Diff = adj(Cshift(A[p],dir,1)) - A[pp];
|
||||
// std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
|
||||
A[pp] = adj(Cshift(A[p],dir,1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -52,6 +52,9 @@ public:
|
||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
|
||||
virtual void HermOp(const Field &in, Field &out)=0;
|
||||
|
||||
virtual std::vector<int> Directions(void) =0;
|
||||
virtual std::vector<int> Displacements(void)=0;
|
||||
};
|
||||
|
||||
|
||||
@ -76,6 +79,9 @@ class MdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||
public:
|
||||
MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
@ -111,6 +117,8 @@ class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
RealD _shift;
|
||||
public:
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
@ -151,6 +159,8 @@ template<class Matrix,class Field>
|
||||
class HermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
@ -182,6 +192,8 @@ template<class Matrix,class Field>
|
||||
class NonHermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
@ -255,6 +267,8 @@ template<class Matrix,class Field>
|
||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||
public:
|
||||
Matrix &_Mat;
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual void Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
@ -281,6 +295,8 @@ template<class Matrix,class Field>
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual void Mpc (const Field &in, Field &out) {
|
||||
@ -307,6 +323,8 @@ template<class Matrix,class Field>
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual void Mpc (const Field &in, Field &out) {
|
||||
@ -372,6 +390,8 @@ class NonHermitianSchurDiagMooeeOperator : public NonHermitianSchurOperatorBase
|
||||
{
|
||||
public:
|
||||
Matrix& _Mat;
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
|
||||
virtual void Mpc(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
@ -405,6 +425,8 @@ class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase<Fi
|
||||
Matrix &_Mat;
|
||||
|
||||
public:
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
|
||||
virtual void Mpc(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
@ -435,6 +457,8 @@ class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Fi
|
||||
Matrix& _Mat;
|
||||
|
||||
public:
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
|
||||
|
||||
virtual void Mpc(const Field& in, Field& out) {
|
||||
@ -475,6 +499,8 @@ class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
Field tmp;
|
||||
RealD mass;
|
||||
public:
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||
{
|
||||
assert( _Mat.isTrivialEE() );
|
||||
|
@ -48,6 +48,8 @@ public:
|
||||
virtual void Mdiag (const Field &in, Field &out)=0;
|
||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
||||
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
|
||||
virtual std::vector<int> Directions(void) =0;
|
||||
virtual std::vector<int> Displacements(void)=0;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -73,6 +75,8 @@ public:
|
||||
virtual void MooeeDag (const Field &in, Field &out)=0;
|
||||
virtual void MooeeInvDag (const Field &in, Field &out)=0;
|
||||
|
||||
virtual std::vector<int> Directions(void) =0;
|
||||
virtual std::vector<int> Displacements(void)=0;
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,4 +1,4 @@
|
||||
/*************************************************************************************
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
@ -28,6 +28,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
/*
|
||||
* Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv.
|
||||
* Script A = SolverMatrix
|
||||
@ -50,53 +51,54 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
* Vout = x
|
||||
*/
|
||||
|
||||
// abstract base
|
||||
template<class Field, class CoarseField>
|
||||
|
||||
template<class Field, class CoarseField, class Aggregates>
|
||||
class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
|
||||
int verbose;
|
||||
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
const int mmax = 5;
|
||||
GridBase *grid;
|
||||
GridBase *coarsegrid;
|
||||
const int mmax = 4;
|
||||
GridBase *FineGrid;
|
||||
GridBase *CoarseGrid;
|
||||
|
||||
LinearOperatorBase<Field> *_Linop
|
||||
OperatorFunction<Field> *_Smoother,
|
||||
LinearFunction<CoarseField> *_CoarseSolver;
|
||||
|
||||
// Need somthing that knows how to get from Coarse to fine and back again
|
||||
LinearOperatorBase<Field> &_Linop;
|
||||
LinearFunction<Field> &_Smoother;
|
||||
LinearFunction<CoarseField> &_CoarseSolver;
|
||||
Aggregates &_Aggregates;
|
||||
|
||||
// more most opertor functions
|
||||
TwoLevelFlexiblePcg(RealD tol,
|
||||
Integer maxit,
|
||||
LinearOperatorBase<Field> *Linop,
|
||||
LinearOperatorBase<Field> *SmootherLinop,
|
||||
OperatorFunction<Field> *Smoother,
|
||||
OperatorFunction<CoarseField> CoarseLinop
|
||||
) :
|
||||
Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
_Linop(Linop),
|
||||
_PreconditionerLinop(PrecLinop),
|
||||
_Preconditioner(Preconditioner)
|
||||
Integer maxit,
|
||||
LinearOperatorBase<Field> *Linop,
|
||||
LinearFunction<Field> *Smoother,
|
||||
LinearFunction<CoarseField> *CoarseSolver,
|
||||
Aggregates *AggP
|
||||
) :
|
||||
Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
_Linop(*Linop),
|
||||
_Smoother(*Smoother),
|
||||
_CoarseSolver(*CoarseSolver),
|
||||
_Aggregates(*AggP)
|
||||
{
|
||||
CoarseGrid=_Aggregates.CoarseGrid;
|
||||
FineGrid=_Aggregates.FineGrid;
|
||||
verbose=0;
|
||||
};
|
||||
|
||||
// The Pcg routine is common to all, but the various matrices differ from derived
|
||||
// implementation to derived implmentation
|
||||
void operator() (const Field &src, Field &psi){
|
||||
void operator() (const Field &src, Field &psi){
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
grid = src.Grid();
|
||||
|
||||
RealD f;
|
||||
RealD rtzp,rtz,a,d,b;
|
||||
RealD rptzp;
|
||||
RealD tn;
|
||||
// RealD rptzp;
|
||||
// RealD tn;
|
||||
RealD guess = norm2(psi);
|
||||
RealD ssq = norm2(src);
|
||||
RealD rsq = ssq*Tolerance*Tolerance;
|
||||
@ -104,15 +106,15 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
/////////////////////////////
|
||||
// Set up history vectors
|
||||
/////////////////////////////
|
||||
std::vector<Field> p (mmax,grid);
|
||||
std::vector<Field> mmp(mmax,grid);
|
||||
std::vector<Field> p (mmax,FineGrid);
|
||||
std::vector<Field> mmp(mmax,FineGrid);
|
||||
std::vector<RealD> pAp(mmax);
|
||||
|
||||
Field x (grid); x = psi;
|
||||
Field z (grid);
|
||||
Field tmp(grid);
|
||||
Field r (grid);
|
||||
Field mu (grid);
|
||||
Field x (FineGrid); x = psi;
|
||||
Field z (FineGrid);
|
||||
Field tmp(FineGrid);
|
||||
Field r (FineGrid);
|
||||
Field mu (FineGrid);
|
||||
|
||||
//////////////////////////
|
||||
// x0 = Vstart -- possibly modify guess
|
||||
@ -121,13 +123,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
Vstart(x,src);
|
||||
|
||||
// r0 = b -A x0
|
||||
HermOp(x,mmp); // Shouldn't this be something else?
|
||||
_Linop.HermOp(x,mmp[0]); // Shouldn't this be something else?
|
||||
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
|
||||
|
||||
//////////////////////////////////
|
||||
// Compute z = M1 x
|
||||
//////////////////////////////////
|
||||
M1(r,z,tmp,mp,SmootherMirs);
|
||||
M1(r,z);
|
||||
rtzp =real(innerProduct(r,z));
|
||||
|
||||
///////////////////////////////////////
|
||||
@ -143,7 +145,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
int peri_kp = (k+1) % mmax;
|
||||
|
||||
rtz=rtzp;
|
||||
d= M3(p[peri_k],mp,mmp[peri_k],tmp);
|
||||
d= M3(p[peri_k],mmp[peri_k]);
|
||||
a = rtz/d;
|
||||
|
||||
// Memorise this
|
||||
@ -153,13 +155,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
|
||||
|
||||
// Compute z = M x
|
||||
M1(r,z,tmp,mp);
|
||||
M1(r,z);
|
||||
|
||||
rtzp =real(innerProduct(r,z));
|
||||
|
||||
M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
|
||||
|
||||
p[peri_kp]=p[peri_k];
|
||||
p[peri_kp]=mu;
|
||||
|
||||
// Standard search direction p -> z + b p ; b =
|
||||
b = (rtzp)/rtz;
|
||||
@ -181,7 +183,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
// Stopping condition
|
||||
if ( rn <= rsq ) {
|
||||
|
||||
HermOp(x,mmp); // Shouldn't this be something else?
|
||||
_Linop.HermOp(x,mmp[0]); // Shouldn't this be something else?
|
||||
axpy(tmp,-1.0,src,mmp[0]);
|
||||
|
||||
RealD psinorm = sqrt(norm2(x));
|
||||
@ -190,7 +192,8 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
RealD true_residual = tmpnorm/srcnorm;
|
||||
std::cout<<GridLogMessage<<"TwoLevelfPcg: true residual is "<<true_residual<<std::endl;
|
||||
std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
|
||||
return k;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Non-convergence
|
||||
@ -199,48 +202,40 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
|
||||
public:
|
||||
|
||||
virtual void M(Field & in,Field & out,Field & tmp) {
|
||||
|
||||
}
|
||||
|
||||
virtual void M1(Field & in, Field & out) {// the smoother
|
||||
|
||||
virtual void M1(Field & in, Field & out)
|
||||
{// the smoother
|
||||
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
||||
Field tmp(grid);
|
||||
Field Min(grid);
|
||||
Field tmp(FineGrid);
|
||||
Field Min(FineGrid);
|
||||
|
||||
PcgM(in,Min); // Smoother call
|
||||
CoarseField PleftProj(CoarseGrid);
|
||||
CoarseField PleftMss_proj(CoarseGrid);
|
||||
|
||||
HermOp(Min,out);
|
||||
_Smoother(in,Min); // Smoother call
|
||||
|
||||
_Linop.HermOp(Min,out);
|
||||
axpy(tmp,-1.0,out,in); // tmp = in - A Min
|
||||
|
||||
ProjectToSubspace(tmp,PleftProj);
|
||||
ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
||||
PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
||||
_Aggregates.ProjectToSubspace(PleftProj,tmp);
|
||||
_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
||||
_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
||||
axpy(out,1.0,Min,tmp); // Min+tmp
|
||||
}
|
||||
|
||||
virtual void M2(const Field & in, Field & out) {
|
||||
virtual void M2(const Field & in, Field & out)
|
||||
{
|
||||
out=in;
|
||||
// Must override for Def2 only
|
||||
// case PcgDef2:
|
||||
// Pright(in,out);
|
||||
// break;
|
||||
}
|
||||
|
||||
virtual RealD M3(const Field & p, Field & mmp){
|
||||
virtual RealD M3(const Field & p, Field & mmp)
|
||||
{
|
||||
double d,dd;
|
||||
HermOpAndNorm(p,mmp,d,dd);
|
||||
_Linop.HermOpAndNorm(p,mmp,d,dd);
|
||||
return dd;
|
||||
// Must override for Def1 only
|
||||
// case PcgDef1:
|
||||
// d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
|
||||
// linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
|
||||
// Pleft(mp,mmp);
|
||||
// d=real(linop_d->inner(p,mmp));
|
||||
}
|
||||
|
||||
virtual void VstartDef2(Field & xconst Field & src){
|
||||
virtual void Vstart(Field & x,const Field & src)
|
||||
{
|
||||
//case PcgDef2:
|
||||
//case PcgAdef2:
|
||||
//case PcgAdef2f:
|
||||
@ -256,142 +251,79 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
// = src_s - (A guess)_s - src_s + (A guess)_s
|
||||
// = 0
|
||||
///////////////////////////////////
|
||||
Field r(grid);
|
||||
Field mmp(grid);
|
||||
Field r(FineGrid);
|
||||
Field mmp(FineGrid);
|
||||
|
||||
CoarseField PleftProj(CoarseGrid);
|
||||
CoarseField PleftMss_proj(CoarseGrid);
|
||||
|
||||
HermOp(x,mmp);
|
||||
_Linop.HermOp(x,mmp);
|
||||
axpy (r, -1.0, mmp, src); // r_{-1} = src - A x
|
||||
ProjectToSubspace(r,PleftProj);
|
||||
ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||
PromoteFromSubspace(PleftMss_proj,mmp);
|
||||
_Aggregates.ProjectToSubspace(PleftProj,r);
|
||||
_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||
_Aggregates.PromoteFromSubspace(PleftMss_proj,mmp);
|
||||
x=x+mmp;
|
||||
|
||||
}
|
||||
|
||||
virtual void Vstart(Field & x,const Field & src){
|
||||
return;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Only Def1 has non-trivial Vout. Override in Def1
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
virtual void Vout (Field & in, Field & out,Field & src){
|
||||
out = in;
|
||||
//case PcgDef1:
|
||||
// //Qb + PT x
|
||||
// ProjectToSubspace(src,PleftProj);
|
||||
// ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||
// PromoteFromSubspace(PleftMss_proj,tmp);
|
||||
//
|
||||
// Pright(in,out);
|
||||
//
|
||||
// linop_d->axpy(out,tmp,out,1.0);
|
||||
// break;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Pright and Pleft are common to all implementations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
virtual void Pright(Field & in,Field & out){
|
||||
virtual void Pright(Field & in,Field & out)
|
||||
{
|
||||
// P_R = [ 1 0 ]
|
||||
// [ -Mss^-1 Msb 0 ]
|
||||
Field in_sbar(grid);
|
||||
Field in_sbar(FineGrid);
|
||||
|
||||
ProjectToSubspace(in,PleftProj);
|
||||
PromoteFromSubspace(PleftProj,out);
|
||||
CoarseField PleftProj(CoarseGrid);
|
||||
CoarseField PleftMss_proj(CoarseGrid);
|
||||
|
||||
_Aggregates.ProjectToSubspace(PleftProj,in);
|
||||
_Aggregates.PromoteFromSubspace(PleftProj,out);
|
||||
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
||||
|
||||
HermOp(in_sbar,out);
|
||||
ProjectToSubspace(out,PleftProj); // Mssbar in_sbar (project)
|
||||
_Linop.HermOp(in_sbar,out);
|
||||
_Aggregates.ProjectToSubspace(PleftProj,out); // Mssbar in_sbar (project)
|
||||
|
||||
ApplyInverse (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
|
||||
PromoteFromSubspace(PleftMss_proj,out); //
|
||||
_CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
|
||||
_Aggregates.PromoteFromSubspace(PleftMss_proj,out); //
|
||||
|
||||
axpy(out,-1.0,out,in_sbar); // in_sbar - Mss^{-1} Mssbar in_sbar
|
||||
}
|
||||
virtual void Pleft (Field & in,Field & out){
|
||||
virtual void Pleft (Field & in,Field & out)
|
||||
{
|
||||
// P_L = [ 1 -Mbs Mss^-1]
|
||||
// [ 0 0 ]
|
||||
Field in_sbar(grid);
|
||||
Field tmp2(grid);
|
||||
Field Mtmp(grid);
|
||||
Field in_sbar(FineGrid);
|
||||
Field tmp2(FineGrid);
|
||||
Field Mtmp(FineGrid);
|
||||
|
||||
ProjectToSubspace(in,PleftProj);
|
||||
PromoteFromSubspace(PleftProj,out);
|
||||
CoarseField PleftProj(CoarseGrid);
|
||||
CoarseField PleftMss_proj(CoarseGrid);
|
||||
|
||||
_Aggregates.ProjectToSubspace(PleftProj,in);
|
||||
_Aggregates.PromoteFromSubspace(PleftProj,out);
|
||||
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
||||
|
||||
ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
|
||||
PromoteFromSubspace(PleftMss_proj,out);
|
||||
_CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} in_s
|
||||
_Aggregates.PromoteFromSubspace(PleftMss_proj,out);
|
||||
|
||||
HermOp(out,Mtmp);
|
||||
_Linop.HermOp(out,Mtmp);
|
||||
|
||||
ProjectToSubspace(Mtmp,PleftProj); // Msbar s Mss^{-1}
|
||||
PromoteFromSubspace(PleftProj,tmp2);
|
||||
_Aggregates.ProjectToSubspace(PleftProj,Mtmp); // Msbar s Mss^{-1}
|
||||
_Aggregates.PromoteFromSubspace(PleftProj,tmp2);
|
||||
|
||||
axpy(out,-1.0,tmp2,Mtmp);
|
||||
axpy(out,-1.0,out,in_sbar); // in_sbar - Msbars Mss^{-1} in_s
|
||||
}
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp){
|
||||
|
||||
}
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
|
||||
|
||||
}
|
||||
virtual void M2(Field & in, Field & out){
|
||||
|
||||
}
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
|
||||
|
||||
}
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
|
||||
|
||||
}
|
||||
}
|
||||
/*
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
virtual void Vout (Field & in, Field & out,Field & src,Field & tmp);
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
|
@ -60,6 +60,8 @@ public:
|
||||
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
|
||||
|
||||
virtual void operator()(const Field &src,Field &guess) {
|
||||
RealD t=-usecond();
|
||||
|
||||
guess = Zero();
|
||||
assert(evec.size()==eval.size());
|
||||
auto N = evec.size();
|
||||
@ -68,6 +70,8 @@ public:
|
||||
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
||||
}
|
||||
guess.Checkerboard() = src.Checkerboard();
|
||||
t+=usecond();
|
||||
std::cout<<GridLogMessage<<"\t\t\t" << "Deflated guess took "<< t/1000.0<< "ms" <<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -59,7 +59,7 @@ public:
|
||||
GridBase *grid = src.Grid();
|
||||
Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid);
|
||||
|
||||
psi=zero;
|
||||
psi=Zero();
|
||||
r = src;
|
||||
Preconditioner(r,p);
|
||||
|
||||
|
@ -53,7 +53,11 @@ public:
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
profilerAllocate(bytes);
|
||||
#ifdef GRID_UVM
|
||||
_Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes);
|
||||
#else
|
||||
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
|
||||
#endif
|
||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||
return ptr;
|
||||
}
|
||||
@ -62,7 +66,11 @@ public:
|
||||
{
|
||||
size_type bytes = __n * sizeof(_Tp);
|
||||
profilerFree(bytes);
|
||||
#ifdef GRID_UVM
|
||||
MemoryManager::SharedFree((void *)__p,bytes);
|
||||
#else
|
||||
MemoryManager::CpuFree((void *)__p,bytes);
|
||||
#endif
|
||||
}
|
||||
|
||||
// FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
|
||||
|
@ -9,11 +9,13 @@ NAMESPACE_BEGIN(Grid);
|
||||
#define AccSmall (3)
|
||||
#define Shared (4)
|
||||
#define SharedSmall (5)
|
||||
uint64_t total_cache;
|
||||
uint64_t total_shared;
|
||||
uint64_t total_device;
|
||||
uint64_t total_host;;
|
||||
void MemoryManager::PrintBytes(void)
|
||||
{
|
||||
std::cout << " MemoryManager : "<<total_cache <<" cache bytes "<<std::endl;
|
||||
std::cout << " MemoryManager : "<<total_shared<<" shared bytes "<<std::endl;
|
||||
std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl;
|
||||
std::cout << " MemoryManager : "<<total_host <<" cpu bytes "<<std::endl;
|
||||
@ -35,6 +37,8 @@ void *MemoryManager::AcceleratorAllocate(size_t bytes)
|
||||
if ( ptr == (void *) NULL ) {
|
||||
ptr = (void *) acceleratorAllocDevice(bytes);
|
||||
total_device+=bytes;
|
||||
} else {
|
||||
// std::cout <<"AcceleratorAllocate: cache hit Device pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
@ -53,8 +57,10 @@ void *MemoryManager::SharedAllocate(size_t bytes)
|
||||
if ( ptr == (void *) NULL ) {
|
||||
ptr = (void *) acceleratorAllocShared(bytes);
|
||||
total_shared+=bytes;
|
||||
// std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
|
||||
// std::cout <<"SharedAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
|
||||
// PrintBytes();
|
||||
} else {
|
||||
// std::cout <<"SharedAllocate: cache hit Shared pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
@ -74,6 +80,9 @@ void *MemoryManager::CpuAllocate(size_t bytes)
|
||||
if ( ptr == (void *) NULL ) {
|
||||
ptr = (void *) acceleratorAllocShared(bytes);
|
||||
total_host+=bytes;
|
||||
// std::cout <<"CpuAllocate: allocated Cpu pointer "<<std::hex<<ptr<<std::dec<<std::endl;
|
||||
} else {
|
||||
// std::cout <<"CpufAllocate: cache hit Cpu pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
@ -120,7 +129,7 @@ void MemoryManager::Init(void)
|
||||
str= getenv("GRID_ALLOC_NCACHE_LARGE");
|
||||
if ( str ) {
|
||||
Nc = atoi(str);
|
||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
||||
if ( (Nc>=0) && (Nc <= NallocCacheMax)) {
|
||||
Ncache[Cpu]=Nc;
|
||||
Ncache[Acc]=Nc;
|
||||
Ncache[Shared]=Nc;
|
||||
@ -130,7 +139,7 @@ void MemoryManager::Init(void)
|
||||
str= getenv("GRID_ALLOC_NCACHE_SMALL");
|
||||
if ( str ) {
|
||||
Nc = atoi(str);
|
||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
||||
if ( (Nc>=0) && (Nc <= NallocCacheMax)) {
|
||||
Ncache[CpuSmall]=Nc;
|
||||
Ncache[AccSmall]=Nc;
|
||||
Ncache[SharedSmall]=Nc;
|
||||
@ -211,6 +220,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
|
||||
|
||||
if ( entries[v].valid ) {
|
||||
ret = entries[v].address;
|
||||
total_cache-=entries[v].bytes;
|
||||
entries[v].valid = 0;
|
||||
entries[v].address = NULL;
|
||||
entries[v].bytes = 0;
|
||||
@ -219,6 +229,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
|
||||
entries[v].address=ptr;
|
||||
entries[v].bytes =bytes;
|
||||
entries[v].valid =1;
|
||||
total_cache+=entries[v].bytes;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -243,6 +254,7 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach
|
||||
for(int e=0;e<ncache;e++){
|
||||
if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
|
||||
entries[e].valid = 0;
|
||||
total_cache-=bytes;
|
||||
return entries[e].address;
|
||||
}
|
||||
}
|
||||
|
@ -93,8 +93,8 @@ private:
|
||||
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
|
||||
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
|
||||
|
||||
static void PrintBytes(void);
|
||||
public:
|
||||
static void PrintBytes(void);
|
||||
static void Init(void);
|
||||
static void InitMessage(void);
|
||||
static void *AcceleratorAllocate(size_t bytes);
|
||||
|
@ -138,6 +138,21 @@ public:
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int xmit_to_rank,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||
|
||||
double StencilSendToRecvFrom(void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
|
@ -77,6 +77,15 @@ void CartesianCommunicator::GlobalSumVector(uint64_t *,int N){}
|
||||
void CartesianCommunicator::GlobalXOR(uint32_t &){}
|
||||
void CartesianCommunicator::GlobalXOR(uint64_t &){}
|
||||
|
||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int xmit_to_rank,
|
||||
int recv_from_rank,
|
||||
int bytes)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
// Basic Halo comms primitive -- should never call in single node
|
||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
@ -87,6 +96,20 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
bcopy(in,out,bytes*words);
|
||||
@ -114,6 +137,10 @@ double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
int recv_from_rank,
|
||||
int bytes, int dir)
|
||||
{
|
||||
std::vector<CommsRequest_t> list;
|
||||
// Discard the "dir"
|
||||
SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||
SendToRecvFromComplete(list);
|
||||
return 2.0*bytes;
|
||||
}
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
@ -123,10 +150,13 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
int recv_from_rank,
|
||||
int bytes, int dir)
|
||||
{
|
||||
// Discard the "dir"
|
||||
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||
return 2.0*bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::StencilBarrier(void){};
|
||||
|
@ -32,9 +32,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#ifdef GRID_CUDA
|
||||
#include <cuda_runtime_api.h>
|
||||
#endif
|
||||
#ifdef GRID_HIP
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
#define header "SharedMemoryMpi: "
|
||||
@ -428,7 +425,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs mapping intended
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#if defined(GRID_CUDA) ||defined(GRID_HIP)
|
||||
#ifdef GRID_CUDA
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
void * ShmCommBuf ;
|
||||
@ -451,15 +448,21 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Each MPI rank should allocate our own buffer
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
ShmCommBuf = acceleratorAllocDevice(bytes);
|
||||
|
||||
#ifndef GRID_MPI3_SHM_NONE
|
||||
auto err = cudaMalloc(&ShmCommBuf, bytes);
|
||||
#else
|
||||
auto err = cudaMallocManaged(&ShmCommBuf, bytes);
|
||||
#endif
|
||||
if ( err != cudaSuccess) {
|
||||
std::cerr << " SharedMemoryMPI.cc cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (ShmCommBuf == (void *)NULL ) {
|
||||
std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
|
||||
std::cerr << " SharedMemoryMPI.cc cudaMallocManaged failed NULL pointer for " << bytes<<" bytes " << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if ( WorldRank == 0 ){
|
||||
std::cout << header " SharedMemoryMPI.cc cudaMalloc "<< bytes
|
||||
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
|
||||
std::cout << header " SharedMemoryMPI.cc cudaMalloc "<< bytes << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
|
||||
}
|
||||
SharedMemoryZero(ShmCommBuf,bytes);
|
||||
|
||||
@ -472,26 +475,15 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
//////////////////////////////////////////////////
|
||||
// If it is me, pass around the IPC access key
|
||||
//////////////////////////////////////////////////
|
||||
#ifdef GRID_CUDA
|
||||
cudaIpcMemHandle_t handle;
|
||||
|
||||
if ( r==WorldShmRank ) {
|
||||
auto err = cudaIpcGetMemHandle(&handle,ShmCommBuf);
|
||||
err = cudaIpcGetMemHandle(&handle,ShmCommBuf);
|
||||
if ( err != cudaSuccess) {
|
||||
std::cerr << " SharedMemoryMPI.cc cudaIpcGetMemHandle failed for rank" << r <<" "<<cudaGetErrorString(err)<< std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef GRID_HIP
|
||||
hipIpcMemHandle_t handle;
|
||||
if ( r==WorldShmRank ) {
|
||||
auto err = hipIpcGetMemHandle(&handle,ShmCommBuf);
|
||||
if ( err != hipSuccess) {
|
||||
std::cerr << " SharedMemoryMPI.cc hipIpcGetMemHandle failed for rank" << r <<" "<<hipGetErrorString(err)<< std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
//////////////////////////////////////////////////
|
||||
// Share this IPC handle across the Shm Comm
|
||||
//////////////////////////////////////////////////
|
||||
@ -508,24 +500,13 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
// If I am not the source, overwrite thisBuf with remote buffer
|
||||
///////////////////////////////////////////////////////////////
|
||||
void * thisBuf = ShmCommBuf;
|
||||
#ifdef GRID_CUDA
|
||||
if ( r!=WorldShmRank ) {
|
||||
auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess);
|
||||
err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess);
|
||||
if ( err != cudaSuccess) {
|
||||
std::cerr << " SharedMemoryMPI.cc cudaIpcOpenMemHandle failed for rank" << r <<" "<<cudaGetErrorString(err)<< std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef GRID_HIP
|
||||
if ( r!=WorldShmRank ) {
|
||||
auto err = hipIpcOpenMemHandle(&thisBuf,handle,hipIpcMemLazyEnablePeerAccess);
|
||||
if ( err != hipSuccess) {
|
||||
std::cerr << " SharedMemoryMPI.cc hipIpcOpenMemHandle failed for rank" << r <<" "<<hipGetErrorString(err)<< std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Save a copy of the device buffers
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/lattice/Lattice_local.h>
|
||||
#include <Grid/lattice/Lattice_reduction.h>
|
||||
#include <Grid/lattice/Lattice_peekpoke.h>
|
||||
//#include <Grid/lattice/Lattice_reality.h>
|
||||
#include <Grid/lattice/Lattice_reality.h>
|
||||
#include <Grid/lattice/Lattice_real_imag.h>
|
||||
#include <Grid/lattice/Lattice_comparison_utils.h>
|
||||
#include <Grid/lattice/Lattice_comparison.h>
|
||||
|
@ -60,9 +60,9 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
autoView( rhs_v , rhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
auto rhs_t=rhs_v(ss);
|
||||
auto tmp =ret_v(ss);
|
||||
mac(&tmp,&lhs_t,&rhs_t);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
@ -124,7 +124,7 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
auto tmp =ret_v(ss);
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
mac(&tmp,&lhs_t,&rhs);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
@ -182,7 +182,7 @@ void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( rhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
auto tmp =ret_v(ss);
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
mac(&tmp,&lhs,&rhs_t);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
|
@ -2,13 +2,12 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#ifdef GRID_HIP
|
||||
extern hipDeviceProp_t *gpu_props;
|
||||
#define WARP_SIZE 64
|
||||
#endif
|
||||
#ifdef GRID_CUDA
|
||||
extern cudaDeviceProp *gpu_props;
|
||||
#define WARP_SIZE 32
|
||||
#endif
|
||||
|
||||
#define WARP_SIZE 32
|
||||
__device__ unsigned int retirementCount = 0;
|
||||
|
||||
template <class Iterator>
|
||||
@ -65,7 +64,7 @@ __device__ void reduceBlock(volatile sobj *sdata, sobj mySum, const Iterator tid
|
||||
|
||||
// cannot use overloaded operators for sobj as they are not volatile-qualified
|
||||
memcpy((void *)&sdata[tid], (void *)&mySum, sizeof(sobj));
|
||||
acceleratorSynchronise();
|
||||
__syncwarp();
|
||||
|
||||
const Iterator VEC = WARP_SIZE;
|
||||
const Iterator vid = tid & (VEC-1);
|
||||
@ -79,9 +78,9 @@ __device__ void reduceBlock(volatile sobj *sdata, sobj mySum, const Iterator tid
|
||||
beta += temp;
|
||||
memcpy((void *)&sdata[tid], (void *)&beta, sizeof(sobj));
|
||||
}
|
||||
acceleratorSynchronise();
|
||||
__syncwarp();
|
||||
}
|
||||
acceleratorSynchroniseAll();
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
beta = Zero();
|
||||
@ -91,7 +90,7 @@ __device__ void reduceBlock(volatile sobj *sdata, sobj mySum, const Iterator tid
|
||||
}
|
||||
memcpy((void *)&sdata[0], (void *)&beta, sizeof(sobj));
|
||||
}
|
||||
acceleratorSynchroniseAll();
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
|
||||
|
@ -52,6 +52,7 @@ public:
|
||||
// This will be safe to call from accelerator_for and is trivially copy constructible
|
||||
// The copy constructor for this will need to be used by device lambda functions
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
#undef LATTICE_BOUNDS_CHECK
|
||||
template<class vobj>
|
||||
class LatticeView : public LatticeAccelerator<vobj>
|
||||
{
|
||||
@ -61,14 +62,36 @@ public:
|
||||
void * cpu_ptr;
|
||||
#ifdef GRID_SIMT
|
||||
accelerator_inline const typename vobj::scalar_object operator()(size_t i) const {
|
||||
#ifdef LATTICE_BOUNDS_CHECK
|
||||
assert(i<this->_odata_size);
|
||||
assert(i>=0);
|
||||
#endif
|
||||
return coalescedRead(this->_odata[i]);
|
||||
}
|
||||
#else
|
||||
accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; }
|
||||
accelerator_inline const vobj & operator()(size_t i) const {
|
||||
#ifdef LATTICE_BOUNDS_CHECK
|
||||
assert(i<this->_odata_size);
|
||||
assert(i>=0);
|
||||
#endif
|
||||
return this->_odata[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; };
|
||||
accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; };
|
||||
accelerator_inline const vobj & operator[](size_t i) const {
|
||||
#ifdef LATTICE_BOUNDS_CHECK
|
||||
assert(i<this->_odata_size);
|
||||
assert(i>=0);
|
||||
#endif
|
||||
return this->_odata[i];
|
||||
};
|
||||
accelerator_inline vobj & operator[](size_t i) {
|
||||
#ifdef LATTICE_BOUNDS_CHECK
|
||||
assert(i<this->_odata_size);
|
||||
assert(i>=0);
|
||||
#endif
|
||||
return this->_odata[i];
|
||||
};
|
||||
|
||||
accelerator_inline uint64_t begin(void) const { return 0;};
|
||||
accelerator_inline uint64_t end(void) const { return this->_odata_size; };
|
||||
|
@ -130,8 +130,6 @@ public:
|
||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||
|
||||
if ( log.active ) {
|
||||
std::ios_base::fmtflags f(stream.flags());
|
||||
|
||||
stream << log.background()<< std::left;
|
||||
if (log.topWidth > 0)
|
||||
{
|
||||
@ -154,8 +152,6 @@ public:
|
||||
<< now << log.background() << " : " ;
|
||||
}
|
||||
stream << log.colour();
|
||||
stream.flags(f);
|
||||
|
||||
return stream;
|
||||
} else {
|
||||
return devnull;
|
||||
|
@ -77,9 +77,16 @@ const int SpinorIndex = 2;
|
||||
template<typename T> struct isSpinor {
|
||||
static constexpr bool value = (SpinorIndex==T::TensorLevel);
|
||||
};
|
||||
const int CoarseIndex = 4;
|
||||
template<typename T> struct isCoarsened {
|
||||
static constexpr bool value = (CoarseIndex<=T::TensorLevel);
|
||||
};
|
||||
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
|
||||
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
|
||||
|
||||
template <typename T> using IfCoarsened = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
|
||||
template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
|
||||
|
||||
// ChrisK very keen to add extra space for Gparity doubling.
|
||||
//
|
||||
// Also add domain wall index, in a way where Wilson operator
|
||||
|
@ -89,7 +89,8 @@ public:
|
||||
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
|
||||
virtual std::vector<int> Directions(void) =0;
|
||||
virtual std::vector<int> Displacements(void)=0;
|
||||
|
||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
|
||||
|
||||
|
@ -44,6 +44,9 @@ public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef StaggeredKernels<Impl> Kernels;
|
||||
|
||||
virtual std::vector<int> Directions(void) { return this->directions; };
|
||||
virtual std::vector<int> Displacements(void){ return this->displacements;};
|
||||
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
|
@ -49,6 +49,9 @@ public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef StaggeredKernels<Impl> Kernels;
|
||||
|
||||
virtual std::vector<int> Directions(void) { return this->directions; };
|
||||
virtual std::vector<int> Displacements(void){ return this->displacements;};
|
||||
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
|
@ -47,6 +47,9 @@ public:
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
virtual std::vector<int> Directions(void) { return this->directions; };
|
||||
virtual std::vector<int> Displacements(void){ return this->displacements;};
|
||||
|
||||
////////////////////////////////////////
|
||||
// Performance monitoring
|
||||
////////////////////////////////////////
|
||||
|
@ -63,20 +63,17 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic Nc kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Naik>
|
||||
static accelerator_inline
|
||||
template<int Naik> accelerator_inline
|
||||
void DhopSiteGeneric(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
|
||||
template<int Naik> static accelerator_inline
|
||||
template<int Naik> accelerator_inline
|
||||
void DhopSiteGenericInt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
|
||||
template<int Naik> static accelerator_inline
|
||||
template<int Naik> accelerator_inline
|
||||
void DhopSiteGenericExt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
@ -85,20 +82,17 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Nc=3 specific kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int Naik> static accelerator_inline
|
||||
template<int Naik> accelerator_inline
|
||||
void DhopSiteHand(StencilView &st,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
|
||||
template<int Naik> static accelerator_inline
|
||||
template<int Naik> accelerator_inline
|
||||
void DhopSiteHandInt(StencilView &st,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
|
||||
template<int Naik> static accelerator_inline
|
||||
template<int Naik> accelerator_inline
|
||||
void DhopSiteHandExt(StencilView &st,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
@ -107,7 +101,6 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Asm Nc=3 specific kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void DhopSiteAsm(StencilView &st,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
|
@ -63,6 +63,9 @@ public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef WilsonKernels<Impl> Kernels;
|
||||
|
||||
virtual std::vector<int> Directions(void) { return this->directions; };
|
||||
virtual std::vector<int> Displacements(void){ return this->displacements;};
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
@ -72,6 +72,9 @@ public:
|
||||
typedef WilsonKernels<Impl> Kernels;
|
||||
PmuStat stat;
|
||||
|
||||
virtual std::vector<int> Directions(void) { return this->directions; };
|
||||
virtual std::vector<int> Displacements(void){ return this->displacements;};
|
||||
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
|
@ -79,6 +79,8 @@ public:
|
||||
_Mat.M(in,tmp);
|
||||
G5R5(out,tmp);
|
||||
}
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
};
|
||||
|
||||
|
||||
@ -127,6 +129,8 @@ public:
|
||||
_Mat.M(in,tmp);
|
||||
out=g5*tmp;
|
||||
}
|
||||
virtual std::vector<int> Directions(void) { return _Mat.Directions();};
|
||||
virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -799,7 +799,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
|
||||
PropagatorField tmp(UGrid);
|
||||
PropagatorField Utmp(UGrid);
|
||||
PropagatorField zz (UGrid); zz=0.0;
|
||||
LatticeInteger zz (UGrid); zz=0.0;
|
||||
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
|
||||
for (int s=0;s<Ls;s++) {
|
||||
|
||||
@ -850,7 +850,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField tmp(UGrid);
|
||||
PropagatorField Utmp(UGrid);
|
||||
|
||||
PropagatorField zz (UGrid); zz=0.0;
|
||||
LatticeInteger zz (UGrid); zz=0.0;
|
||||
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
|
||||
|
||||
for(int s=0;s<Ls;s++){
|
||||
|
@ -146,7 +146,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
template <class Impl>
|
||||
template <int Naik> accelerator_inline
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
@ -221,7 +221,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
|
||||
|
||||
|
||||
template <class Impl>
|
||||
template <int Naik> accelerator_inline
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
@ -300,7 +300,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
|
||||
|
||||
|
||||
template <class Impl>
|
||||
template <int Naik> accelerator_inline
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
|
@ -78,7 +78,7 @@ StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
|
||||
// Int, Ext, Int+Ext cases for comms overlap
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
template <int Naik> accelerator_inline
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
@ -126,7 +126,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
|
||||
// Only contributions from interior of our node
|
||||
///////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
template <int Naik> accelerator_inline
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
@ -174,7 +174,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
|
||||
// Only contributions from exterior of our node
|
||||
///////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
template <int Naik> accelerator_inline
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
@ -224,7 +224,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Driving / wrapping routine to select right kernel
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp)
|
||||
{
|
||||
@ -253,7 +253,7 @@ void StaggeredKernels<Impl>::DhopDirKernel(StencilImpl &st, DoubledGaugeFieldVie
|
||||
ThisKernel::A(st_v,U_v,UUU_v,buf,sF,sU,in_v,out_v,dag); \
|
||||
});
|
||||
|
||||
template <class Impl>
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
const FermionField &in, FermionField &out, int dag, int interior,int exterior)
|
||||
@ -293,7 +293,7 @@ void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
|
||||
}
|
||||
assert(0 && " Kernel optimisation case not covered ");
|
||||
}
|
||||
template <class Impl>
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag, int interior,int exterior)
|
||||
|
@ -133,14 +133,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
pickCheckerboard(Even, CloverTermEven, CloverTerm);
|
||||
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
|
||||
|
||||
pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm)));
|
||||
pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm)));
|
||||
pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
|
||||
pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
|
||||
|
||||
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
|
||||
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
|
||||
|
||||
pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv)));
|
||||
pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv)));
|
||||
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
|
||||
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
@ -646,7 +646,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
#define HAND_SPECIALISE_GPARITY(IMPL) \
|
||||
template<> accelerator_inline void \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
@ -662,7 +662,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> accelerator_inline void \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
@ -678,7 +678,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> accelerator_inline void \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
@ -694,7 +694,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> accelerator_inline void \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
@ -710,7 +710,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> accelerator_inline void \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
@ -727,7 +727,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
nmu = 0; \
|
||||
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
template<> accelerator_inline void \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
|
@ -495,7 +495,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Impl> accelerator_inline void
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
@ -519,7 +519,7 @@ WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,Site
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl> accelerator_inline
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
@ -542,7 +542,7 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl> accelerator_inline void
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
@ -566,7 +566,7 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,Si
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl> accelerator_inline
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
@ -589,7 +589,7 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldVi
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl> accelerator_inline void
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
@ -614,7 +614,7 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,Si
|
||||
HAND_RESULT_EXT(ss);
|
||||
}
|
||||
|
||||
template<class Impl> accelerator_inline
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
|
@ -114,7 +114,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// All legs kernels ; comms then compute
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl> accelerator_inline
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
@ -140,7 +140,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldV
|
||||
coalescedWrite(out[sF],result,lane);
|
||||
};
|
||||
|
||||
template <class Impl> accelerator_inline
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
@ -169,7 +169,7 @@ void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Interior kernels
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl> accelerator_inline
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
@ -197,7 +197,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFi
|
||||
coalescedWrite(out[sF], result,lane);
|
||||
};
|
||||
|
||||
template <class Impl> accelerator_inline
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
@ -227,7 +227,7 @@ void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeField
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Exterior kernels
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl> accelerator_inline
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
@ -258,7 +258,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFi
|
||||
}
|
||||
};
|
||||
|
||||
template <class Impl> accelerator_inline
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
@ -290,7 +290,7 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeField
|
||||
};
|
||||
|
||||
#define DhopDirMacro(Dir,spProj,spRecon) \
|
||||
template <class Impl> accelerator_inline \
|
||||
template <class Impl> \
|
||||
void WilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, \
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
|
||||
{ \
|
||||
@ -318,7 +318,7 @@ DhopDirMacro(Ym,spProjYm,spReconYm);
|
||||
DhopDirMacro(Zm,spProjZm,spReconZm);
|
||||
DhopDirMacro(Tm,spProjTm,spReconTm);
|
||||
|
||||
template <class Impl> accelerator_inline
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
|
||||
{
|
||||
|
@ -128,7 +128,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
hspin(0)=fspin(0)-fspin(2);
|
||||
hspin(1)=fspin(1)-fspin(3);
|
||||
}
|
||||
@ -138,40 +137,50 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
* 0 0 -1 0
|
||||
* 0 0 0 -1
|
||||
*/
|
||||
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
hspin(0)=fspin(0);
|
||||
hspin(1)=fspin(1);
|
||||
}
|
||||
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
hspin(0)=fspin(2);
|
||||
hspin(1)=fspin(3);
|
||||
}
|
||||
|
||||
// template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
rfspin(0)=fspin(0);
|
||||
rfspin(1)=fspin(1);
|
||||
rfspin(2)=Zero();
|
||||
rfspin(3)=Zero();
|
||||
}
|
||||
// template<class vtype> accelerator_inline void fspProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
rfspin(0)=Zero();
|
||||
rfspin(1)=Zero();
|
||||
rfspin(2)=fspin(2);
|
||||
rfspin(3)=fspin(3);
|
||||
}
|
||||
|
||||
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
const int hN = N>>1;
|
||||
for(int s=0;s<hN;s++){
|
||||
rfspin(s)=fspin(s);
|
||||
rfspin(s+hN)=Zero();
|
||||
}
|
||||
}
|
||||
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
const int hN = N>>1;
|
||||
for(int s=0;s<hN;s++){
|
||||
rfspin(s)=Zero();
|
||||
rfspin(s+hN)=fspin(s+hN);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Reconstruction routines to move back again to four spin
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -183,7 +192,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
*/
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)=hspin(0);
|
||||
fspin(1)=hspin(1);
|
||||
fspin(2)=timesMinusI(hspin(1));
|
||||
@ -191,7 +199,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)=hspin(0);
|
||||
fspin(1)=hspin(1);
|
||||
fspin(2)=timesI(hspin(1));
|
||||
@ -199,7 +206,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
fspin(2)-=timesI(hspin(1));
|
||||
@ -207,7 +213,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
fspin(2)+=timesI(hspin(1));
|
||||
@ -221,7 +226,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)=hspin(0);
|
||||
fspin(1)=hspin(1);
|
||||
fspin(2)= hspin(1);
|
||||
@ -229,7 +233,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)=hspin(0);
|
||||
fspin(1)=hspin(1);
|
||||
fspin(2)=-hspin(1);
|
||||
@ -237,7 +240,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
fspin(2)+=hspin(1);
|
||||
@ -245,7 +247,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
fspin(2)-=hspin(1);
|
||||
@ -260,7 +261,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
*/
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)=hspin(0);
|
||||
fspin(1)=hspin(1);
|
||||
fspin(2)=timesMinusI(hspin(0));
|
||||
@ -268,7 +268,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)=hspin(0);
|
||||
fspin(1)=hspin(1);
|
||||
fspin(2)= timesI(hspin(0));
|
||||
@ -276,7 +275,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
fspin(2)-=timesI(hspin(0));
|
||||
@ -284,7 +282,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
fspin(2)+=timesI(hspin(0));
|
||||
@ -298,7 +295,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
*/
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)=hspin(0);
|
||||
fspin(1)=hspin(1);
|
||||
fspin(2)=hspin(0);
|
||||
@ -306,7 +302,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)=hspin(0);
|
||||
fspin(1)=hspin(1);
|
||||
fspin(2)=-hspin(0);
|
||||
@ -314,7 +309,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
fspin(2)+=hspin(0);
|
||||
@ -322,7 +316,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
fspin(2)-=hspin(0);
|
||||
@ -336,7 +329,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
*/
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
|
||||
fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
|
||||
fspin(2)=Zero();
|
||||
@ -344,7 +336,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)=Zero();
|
||||
fspin(1)=Zero();
|
||||
fspin(2)=hspin(0)+hspin(0);
|
||||
@ -352,7 +343,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0)+hspin(0);
|
||||
fspin(1)+=hspin(1)+hspin(1);
|
||||
}
|
||||
@ -372,7 +362,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
//////////
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProjXp(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
@ -426,26 +415,21 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iM
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
|
||||
////////
|
||||
// Xm
|
||||
////////
|
||||
template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spProjXm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProjXm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -455,19 +439,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatri
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spReconXm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spReconXm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -476,45 +457,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatr
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
accumReconXm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
accumReconXm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
|
||||
////////
|
||||
// Yp
|
||||
////////
|
||||
template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spProjYp(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProjYp(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -524,19 +497,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatri
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spReconYp(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spReconYp(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -545,66 +515,55 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatr
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
accumReconYp(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
accumReconYp(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
////////
|
||||
// Ym
|
||||
////////
|
||||
template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spProjYm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProjYm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spReconYm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,const iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spReconYm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -613,19 +572,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatr
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
accumReconYm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
accumReconYm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -638,66 +594,57 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iM
|
||||
////////
|
||||
template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spProjZp(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProjZp(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spReconZp(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spReconZp(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}}
|
||||
}
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
accumReconZp(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
accumReconZp(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
@ -706,62 +653,53 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iM
|
||||
////////
|
||||
template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spProjZm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProjZm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spReconZm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spReconZm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}}
|
||||
}
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
accumReconZm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
accumReconZm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -774,41 +712,35 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iM
|
||||
////////
|
||||
template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spProjTp(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProjTp(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spReconTp(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spReconTp(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -817,44 +749,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatr
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
accumReconTp(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
accumReconTp(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
////////
|
||||
// Tm
|
||||
////////
|
||||
template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spProjTm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProjTm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -864,19 +789,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatri
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spReconTm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spReconTm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -885,44 +807,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatr
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
accumReconTm(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
accumReconTm(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
////////
|
||||
// 5p
|
||||
////////
|
||||
template<class rtype,class vtype> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spProj5p(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProj5p(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -931,19 +846,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatri
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spRecon5p(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spRecon5p(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -952,19 +864,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatr
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
accumRecon5p(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
accumRecon5p(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -972,24 +881,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iM
|
||||
}
|
||||
|
||||
// four spinor projectors for chiral proj
|
||||
// template<class vtype> accelerator_inline void fspProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
||||
template<class vtype> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
||||
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spProj5p(hspin._internal,fspin._internal);
|
||||
}
|
||||
// template<class vtype,int N> accelerator_inline void fspProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
|
||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProj5p(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
// template<class vtype,int N> accelerator_inline void fspProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
|
||||
template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -1001,17 +904,17 @@ template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &
|
||||
// 5m
|
||||
////////
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
spProj5m(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
for(int i=0;i<N;i++) {
|
||||
spProj5m(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
@ -1021,40 +924,34 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatri
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spRecon5m(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spRecon5m(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
}}
|
||||
}
|
||||
|
||||
template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
accumRecon5m(hspin._internal,fspin._internal);
|
||||
}
|
||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
accumRecon5m(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
@ -1063,24 +960,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iM
|
||||
|
||||
|
||||
// four spinor projectors for chiral proj
|
||||
// template<class vtype> accelerator_inline void fspProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
||||
template<class vtype> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
||||
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
||||
spProj5m(hspin._internal,fspin._internal);
|
||||
}
|
||||
// template<class vtype,int N> accelerator_inline void fspProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
|
||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++) {
|
||||
spProj5m(hspin._internal[i],fspin._internal[i]);
|
||||
}
|
||||
}
|
||||
// template<class vtype,int N> accelerator_inline void fspProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
|
||||
template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
|
@ -154,8 +154,8 @@ void axpby_ssp_pminus(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,co
|
||||
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
||||
uint64_t ss = sss*Ls;
|
||||
decltype(coalescedRead(y_v[ss+sp])) tmp;
|
||||
spProj5m(tmp,y_v(ss+sp));
|
||||
tmp = a*x_v(ss+s)+b*tmp;
|
||||
spProj5m(tmp,y_v(ss+sp));
|
||||
tmp = a*x_v(ss+s)+b*tmp;
|
||||
coalescedWrite(z_v[ss+s],tmp);
|
||||
});
|
||||
}
|
||||
@ -188,7 +188,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
|
||||
z.Checkerboard() = x.Checkerboard();
|
||||
conformable(x,z);
|
||||
int Ls = grid->_rdimensions[0];
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
autoView( x_v, x, AcceleratorRead);
|
||||
autoView( z_v, z, AcceleratorWrite);
|
||||
uint64_t nloop = grid->oSites()/Ls;
|
||||
@ -196,7 +195,13 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
|
||||
uint64_t ss = sss*Ls;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sp = Ls-1-s;
|
||||
coalescedWrite(z_v[ss+sp],G5*x_v(ss+s));
|
||||
auto tmp = x_v(ss+s);
|
||||
decltype(tmp) tmp_p;
|
||||
decltype(tmp) tmp_m;
|
||||
spProj5p(tmp_p,tmp);
|
||||
spProj5m(tmp_m,tmp);
|
||||
// Use of spProj5m, 5p captures the coarse space too
|
||||
coalescedWrite(z_v[ss+sp],tmp_p - tmp_m);
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -208,10 +213,20 @@ void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
|
||||
z.Checkerboard() = x.Checkerboard();
|
||||
conformable(x, z);
|
||||
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
z = G5 * x;
|
||||
autoView( x_v, x, AcceleratorRead);
|
||||
autoView( z_v, z, AcceleratorWrite);
|
||||
uint64_t nloop = grid->oSites();
|
||||
accelerator_for(ss,nloop,vobj::Nsimd(),{
|
||||
auto tmp = x_v(ss);
|
||||
decltype(tmp) tmp_p;
|
||||
decltype(tmp) tmp_m;
|
||||
spProj5p(tmp_p,tmp);
|
||||
spProj5m(tmp_m,tmp);
|
||||
coalescedWrite(z_v[ss],tmp_p - tmp_m);
|
||||
});
|
||||
}
|
||||
|
||||
/*
|
||||
template<class CComplex, int nbasis>
|
||||
void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
|
||||
{
|
||||
@ -234,6 +249,7 @@ void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex,
|
||||
}
|
||||
});
|
||||
}
|
||||
*/
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -41,11 +41,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
|
||||
typedef struct { uint16_t x;} half;
|
||||
#endif
|
||||
typedef struct Half2_t { half x; half y; } Half2;
|
||||
|
||||
#define COALESCE_GRANULARITY ( GEN_SIMD_WIDTH )
|
||||
|
||||
template<class pair>
|
||||
@ -130,14 +125,14 @@ inline accelerator GpuVector<N,datum> operator/(const GpuVector<N,datum> l,const
|
||||
}
|
||||
|
||||
constexpr int NSIMD_RealH = COALESCE_GRANULARITY / sizeof(half);
|
||||
constexpr int NSIMD_ComplexH = COALESCE_GRANULARITY / sizeof(Half2);
|
||||
constexpr int NSIMD_ComplexH = COALESCE_GRANULARITY / sizeof(half2);
|
||||
constexpr int NSIMD_RealF = COALESCE_GRANULARITY / sizeof(float);
|
||||
constexpr int NSIMD_ComplexF = COALESCE_GRANULARITY / sizeof(float2);
|
||||
constexpr int NSIMD_RealD = COALESCE_GRANULARITY / sizeof(double);
|
||||
constexpr int NSIMD_ComplexD = COALESCE_GRANULARITY / sizeof(double2);
|
||||
constexpr int NSIMD_Integer = COALESCE_GRANULARITY / sizeof(Integer);
|
||||
|
||||
typedef GpuComplex<Half2 > GpuComplexH;
|
||||
typedef GpuComplex<half2 > GpuComplexH;
|
||||
typedef GpuComplex<float2 > GpuComplexF;
|
||||
typedef GpuComplex<double2> GpuComplexD;
|
||||
|
||||
@ -152,9 +147,11 @@ typedef GpuVector<NSIMD_Integer, Integer > GpuVectorI;
|
||||
accelerator_inline float half2float(half h)
|
||||
{
|
||||
float f;
|
||||
#if defined(GRID_CUDA) || defined(GRID_HIP)
|
||||
#ifdef GRID_SIMT
|
||||
f = __half2float(h);
|
||||
#else
|
||||
//f = __half2float(h);
|
||||
__half_raw hr(h);
|
||||
Grid_half hh;
|
||||
hh.x = hr.x;
|
||||
f= sfw_half_to_float(hh);
|
||||
@ -164,11 +161,13 @@ accelerator_inline float half2float(half h)
|
||||
accelerator_inline half float2half(float f)
|
||||
{
|
||||
half h;
|
||||
#if defined(GRID_CUDA) || defined(GRID_HIP)
|
||||
#ifdef GRID_SIMT
|
||||
h = __float2half(f);
|
||||
#else
|
||||
Grid_half hh = sfw_float_to_half(f);
|
||||
h.x = hh.x;
|
||||
__half_raw hr;
|
||||
hr.x = hh.x;
|
||||
h = __half(hr);
|
||||
#endif
|
||||
return h;
|
||||
}
|
||||
@ -524,7 +523,7 @@ namespace Optimization {
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Single / Half
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
static accelerator_inline GpuVectorCH StoH (GpuVectorCF a,GpuVectorCF b) {
|
||||
static accelerator_inline GpuVectorCH StoH (GpuVectorCF a,GpuVectorCF b) {
|
||||
int N = GpuVectorCF::N;
|
||||
GpuVectorCH h;
|
||||
for(int i=0;i<N;i++) {
|
||||
|
@ -55,7 +55,6 @@ void acceleratorInit(void)
|
||||
printf("AcceleratorCudaInit[%d]: ========================\n",rank);
|
||||
printf("AcceleratorCudaInit[%d]: Device identifier: %s\n",rank, prop.name);
|
||||
|
||||
|
||||
GPU_PROP_FMT(totalGlobalMem,"%lld");
|
||||
GPU_PROP(managedMemory);
|
||||
GPU_PROP(isMultiGpuBoard);
|
||||
@ -110,24 +109,20 @@ void acceleratorInit(void)
|
||||
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
|
||||
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
|
||||
|
||||
printf("world_rank %d has %d devices\n",world_rank,nDevices);
|
||||
size_t totalDeviceMem=0;
|
||||
for (int i = 0; i < nDevices; i++) {
|
||||
|
||||
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
||||
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
||||
|
||||
hipGetDeviceProperties(&gpu_props[i], i);
|
||||
hipDeviceProp_t prop;
|
||||
prop = gpu_props[i];
|
||||
totalDeviceMem = prop.totalGlobalMem;
|
||||
if ( world_rank == 0) {
|
||||
hipDeviceProp_t prop;
|
||||
prop = gpu_props[i];
|
||||
printf("AcceleratorHipInit: ========================\n");
|
||||
printf("AcceleratorHipInit: Device Number : %d\n", i);
|
||||
printf("AcceleratorHipInit: ========================\n");
|
||||
printf("AcceleratorHipInit: Device identifier: %s\n", prop.name);
|
||||
|
||||
GPU_PROP_FMT(totalGlobalMem,"%lu");
|
||||
// GPU_PROP(managedMemory);
|
||||
GPU_PROP(isMultiGpuBoard);
|
||||
GPU_PROP(warpSize);
|
||||
@ -136,7 +131,6 @@ void acceleratorInit(void)
|
||||
// GPU_PROP(singleToDoublePrecisionPerfRatio);
|
||||
}
|
||||
}
|
||||
MemoryManager::DeviceMaxBytes = (8*totalDeviceMem)/10; // Assume 80% ours
|
||||
#undef GPU_PROP_FMT
|
||||
#undef GPU_PROP
|
||||
#ifdef GRID_IBM_SUMMIT
|
||||
|
@ -151,6 +151,9 @@ inline void *acceleratorAllocShared(size_t bytes)
|
||||
ptr = (void *) NULL;
|
||||
printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
|
||||
}
|
||||
// size_t free,total;
|
||||
// cudaMemGetInfo(&free,&total);
|
||||
// std::cout << "Malloc managed "<<bytes<<" "<<free<<"/"<<total<<std::endl;
|
||||
return ptr;
|
||||
};
|
||||
inline void *acceleratorAllocDevice(size_t bytes)
|
||||
@ -161,6 +164,9 @@ inline void *acceleratorAllocDevice(size_t bytes)
|
||||
ptr = (void *) NULL;
|
||||
printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
|
||||
}
|
||||
// size_t free,total;
|
||||
// cudaMemGetInfo(&free,&total);
|
||||
// std::cout << "Malloc device "<<bytes<<" "<<free<<"/"<<total<<std::endl;
|
||||
return ptr;
|
||||
};
|
||||
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
|
||||
@ -307,13 +313,17 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
||||
|
||||
inline void *acceleratorAllocShared(size_t bytes)
|
||||
{
|
||||
#if 0
|
||||
void *ptr=NULL;
|
||||
auto err = hipMallocManaged((void **)&ptr,bytes);
|
||||
if( err != hipSuccess ) {
|
||||
ptr = (void *) NULL;
|
||||
printf(" hipMallocManaged failed for %ld %s \n",bytes,hipGetErrorString(err));
|
||||
printf(" hipMallocManaged failed for %d %s \n",bytes,hipGetErrorString(err));
|
||||
}
|
||||
return ptr;
|
||||
#else
|
||||
return malloc(bytes);
|
||||
#endif
|
||||
};
|
||||
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
||||
|
||||
@ -323,7 +333,7 @@ inline void *acceleratorAllocDevice(size_t bytes)
|
||||
auto err = hipMalloc((void **)&ptr,bytes);
|
||||
if( err != hipSuccess ) {
|
||||
ptr = (void *) NULL;
|
||||
printf(" hipMalloc failed for %ld %s \n",bytes,hipGetErrorString(err));
|
||||
printf(" hipMalloc failed for %d %s \n",bytes,hipGetErrorString(err));
|
||||
}
|
||||
return ptr;
|
||||
};
|
||||
|
@ -14,62 +14,35 @@ std::string filestem(const int l)
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
int64_t threads = GridThread::GetThreads();
|
||||
auto mpi = GridDefaultMpi();
|
||||
std::vector<int> latt;
|
||||
|
||||
int64_t threads = GridThread::GetThreads();
|
||||
MSG << "Grid is setup to use " << threads << " threads" << std::endl;
|
||||
MSG << "MPI partition " << mpi << std::endl;
|
||||
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark std write" << std::endl;
|
||||
MSG << "Benchmark Lime write" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
auto mpi = GridDefaultMpi();
|
||||
std::vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
|
||||
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||
writeBenchmark<LatticeFermion>(latt, filestem(l), stdWrite<LatticeFermion>);
|
||||
}
|
||||
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark std read" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
|
||||
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||
readBenchmark<LatticeFermion>(latt, filestem(l), stdRead<LatticeFermion>);
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Grid C-Lime write" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
|
||||
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||
std::cout << "-- Local volume " << l << "^4" << std::endl;
|
||||
writeBenchmark<LatticeFermion>(latt, filestem(l), limeWrite<LatticeFermion>);
|
||||
}
|
||||
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Grid C-Lime read" << std::endl;
|
||||
MSG << "Benchmark Lime read" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
auto mpi = GridDefaultMpi();
|
||||
std::vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
|
||||
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||
std::cout << "-- Local volume " << l << "^4" << std::endl;
|
||||
readBenchmark<LatticeFermion>(latt, filestem(l), limeRead<LatticeFermion>);
|
||||
}
|
||||
#endif
|
||||
|
||||
Grid_finalize();
|
||||
|
||||
#endif
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -14,140 +14,13 @@ using WriterFn = std::function<void(const std::string, Field &)> ;
|
||||
template <typename Field>
|
||||
using ReaderFn = std::function<void(Field &, const std::string)>;
|
||||
|
||||
// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API
|
||||
//
|
||||
// template <typename Field>
|
||||
// void stdWrite(const std::string filestem, Field &vec)
|
||||
// {
|
||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
|
||||
// size_t size;
|
||||
// uint32_t crc;
|
||||
// GridStopWatch ioWatch, crcWatch;
|
||||
|
||||
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||
// autoView(vec_v, vec, CpuRead);
|
||||
// crcWatch.Start();
|
||||
// crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||
// std::fwrite(&crc, sizeof(uint32_t), 1, file);
|
||||
// crcWatch.Stop();
|
||||
// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||
// ioWatch.Start();
|
||||
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
||||
// ioWatch.Stop();
|
||||
// std::fclose(file);
|
||||
// size *= vec.Grid()->ProcessorCount();
|
||||
// MSG << "Std I/O write: Wrote " << size << " bytes in " << ioWatch.Elapsed()
|
||||
// << ", performance " << size/1024./1024./(ioWatch.useconds()/1.e6)
|
||||
// << " MB/s" << std::endl;
|
||||
// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
// }
|
||||
//
|
||||
// template <typename Field>
|
||||
// void stdRead(Field &vec, const std::string filestem)
|
||||
// {
|
||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
|
||||
// size_t size;
|
||||
// uint32_t crcRead, crcData;
|
||||
// GridStopWatch ioWatch, crcWatch;
|
||||
|
||||
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||
// crcWatch.Start();
|
||||
// std::fread(&crcRead, sizeof(uint32_t), 1, file);
|
||||
// crcWatch.Stop();
|
||||
// {
|
||||
// autoView(vec_v, vec, CpuWrite);
|
||||
// ioWatch.Start();
|
||||
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
||||
// ioWatch.Stop();
|
||||
// std::fclose(file);
|
||||
// }
|
||||
// {
|
||||
// autoView(vec_v, vec, CpuRead);
|
||||
// crcWatch.Start();
|
||||
// crcData = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||
// crcWatch.Stop();
|
||||
// }
|
||||
// MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl;
|
||||
// assert(crcData == crcRead);
|
||||
// size *= vec.Grid()->ProcessorCount();
|
||||
// MSG << "Std I/O read: Read " << size << " bytes in " << ioWatch.Elapsed()
|
||||
// << ", performance " << size/1024./1024./(ioWatch.useconds()/1.e6)
|
||||
// << " MB/s" << std::endl;
|
||||
// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
// }
|
||||
|
||||
template <typename Field>
|
||||
void stdWrite(const std::string filestem, Field &vec)
|
||||
{
|
||||
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
std::ofstream file(filestem + "." + rankStr + ".bin", std::ios::out | std::ios::binary);
|
||||
size_t size, sizec;
|
||||
uint32_t crc;
|
||||
GridStopWatch ioWatch, crcWatch;
|
||||
|
||||
size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||
sizec = size/sizeof(char); // just in case of...
|
||||
autoView(vec_v, vec, CpuRead);
|
||||
crcWatch.Start();
|
||||
crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||
file.write(reinterpret_cast<char *>(&crc), sizeof(uint32_t)/sizeof(char));
|
||||
crcWatch.Stop();
|
||||
MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||
ioWatch.Start();
|
||||
file.write(reinterpret_cast<char *>(vec_v.cpu_ptr), sizec);
|
||||
file.flush();
|
||||
ioWatch.Stop();
|
||||
size *= vec.Grid()->ProcessorCount();
|
||||
MSG << "Std I/O write: Wrote " << size << " bytes in " << ioWatch.Elapsed()
|
||||
<< ", " << size/1024./1024./(ioWatch.useconds()/1.e6)
|
||||
<< " MB/s" << std::endl;
|
||||
MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
}
|
||||
|
||||
template <typename Field>
|
||||
void stdRead(Field &vec, const std::string filestem)
|
||||
{
|
||||
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
std::ifstream file(filestem + "." + rankStr + ".bin", std::ios::in | std::ios::binary);
|
||||
size_t size, sizec;
|
||||
uint32_t crcRead, crcData;
|
||||
GridStopWatch ioWatch, crcWatch;
|
||||
|
||||
size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||
sizec = size/sizeof(char); // just in case of...
|
||||
crcWatch.Start();
|
||||
file.read(reinterpret_cast<char *>(&crcRead), sizeof(uint32_t)/sizeof(char));
|
||||
crcWatch.Stop();
|
||||
{
|
||||
autoView(vec_v, vec, CpuWrite);
|
||||
ioWatch.Start();
|
||||
file.read(reinterpret_cast<char *>(vec_v.cpu_ptr), sizec);
|
||||
ioWatch.Stop();
|
||||
}
|
||||
{
|
||||
autoView(vec_v, vec, CpuRead);
|
||||
crcWatch.Start();
|
||||
crcData = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||
crcWatch.Stop();
|
||||
}
|
||||
MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl;
|
||||
assert(crcData == crcRead);
|
||||
size *= vec.Grid()->ProcessorCount();
|
||||
MSG << "Std I/O read: Read " << size << " bytes in " << ioWatch.Elapsed()
|
||||
<< ", " << size/1024./1024./(ioWatch.useconds()/1.e6)
|
||||
<< " MB/s" << std::endl;
|
||||
MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
}
|
||||
|
||||
template <typename Field>
|
||||
void limeWrite(const std::string filestem, Field &vec)
|
||||
{
|
||||
emptyUserRecord record;
|
||||
ScidacWriter binWriter(vec.Grid()->IsBoss());
|
||||
|
||||
binWriter.open(filestem + ".lime.bin");
|
||||
binWriter.open(filestem + ".bin");
|
||||
binWriter.writeScidacFieldRecord(vec, record);
|
||||
binWriter.close();
|
||||
}
|
||||
@ -158,7 +31,7 @@ void limeRead(Field &vec, const std::string filestem)
|
||||
emptyUserRecord record;
|
||||
ScidacReader binReader;
|
||||
|
||||
binReader.open(filestem + ".lime.bin");
|
||||
binReader.open(filestem + ".bin");
|
||||
binReader.readScidacFieldRecord(vec, record);
|
||||
binReader.close();
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ using namespace Grid;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
std::vector<std::string> dir;
|
||||
unsigned int Ls;
|
||||
bool rb;
|
||||
@ -33,71 +34,46 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
|
||||
int64_t threads = GridThread::GetThreads();
|
||||
auto mpi = GridDefaultMpi();
|
||||
|
||||
MSG << "Grid is setup to use " << threads << " threads" << std::endl;
|
||||
MSG << "MPI partition " << mpi << std::endl;
|
||||
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Grid std write" << std::endl;
|
||||
MSG << "Benchmark double precision Lime write" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (auto &d: dir)
|
||||
{
|
||||
MSG << "-- Directory " << d << std::endl;
|
||||
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||
stdWrite<LatticeFermion>, Ls, rb);
|
||||
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermion>, Ls, rb);
|
||||
}
|
||||
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Grid std read" << std::endl;
|
||||
MSG << "Benchmark double precision Lime read" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (auto &d: dir)
|
||||
{
|
||||
MSG << "-- Directory " << d << std::endl;
|
||||
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||
stdRead<LatticeFermion>, Ls, rb);
|
||||
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermion>, Ls, rb);
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Grid C-Lime write" << std::endl;
|
||||
MSG << "Benchmark single precision Lime write" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (auto &d: dir)
|
||||
{
|
||||
MSG << "-- Directory " << d << std::endl;
|
||||
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||
limeWrite<LatticeFermion>, Ls, rb);
|
||||
writeBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermionF>, Ls, rb);
|
||||
}
|
||||
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Grid C-Lime read" << std::endl;
|
||||
MSG << "Benchmark single precision Lime read" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (auto &d: dir)
|
||||
{
|
||||
MSG << "-- Directory " << d << std::endl;
|
||||
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||
limeRead<LatticeFermion>, Ls, rb);
|
||||
readBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermionF>, Ls, rb);
|
||||
}
|
||||
#endif
|
||||
|
||||
// MSG << SEP << std::endl;
|
||||
// MSG << "Benchmark single precision Lime write" << std::endl;
|
||||
// MSG << SEP << std::endl;
|
||||
// for (auto &d: dir)
|
||||
// {
|
||||
// MSG << "-- Directory " << d << std::endl;
|
||||
// writeBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermionF>, Ls, rb);
|
||||
// }
|
||||
|
||||
// MSG << SEP << std::endl;
|
||||
// MSG << "Benchmark single precision Lime read" << std::endl;
|
||||
// MSG << SEP << std::endl;
|
||||
// for (auto &d: dir)
|
||||
// {
|
||||
// MSG << "-- Directory " << d << std::endl;
|
||||
// readBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermionF>, Ls, rb);
|
||||
// }
|
||||
|
||||
Grid_finalize();
|
||||
|
||||
#endif
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -36,12 +36,12 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
#define LMAX (40)
|
||||
#define LMAX (48)
|
||||
#define LMIN (8)
|
||||
#define LADD (8)
|
||||
|
||||
int64_t Nwarm=10;
|
||||
int64_t Nloop=100;
|
||||
int64_t Nwarm=50;
|
||||
int64_t Nloop=500;
|
||||
|
||||
Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
Coordinate mpi_layout = GridDefaultMpi();
|
||||
@ -118,41 +118,6 @@ int main (int argc, char ** argv)
|
||||
|
||||
}
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z=z+ x*y"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=LMIN;lat<=LMAX;lat+=LADD){
|
||||
|
||||
Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
|
||||
for(int64_t i=0;i<Nwarm;i++){
|
||||
z=z+x*y;
|
||||
}
|
||||
double start=usecond();
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
z=z+x*y;
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000.0;
|
||||
|
||||
double bytes=4*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6+8+8)*vol;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 mult(z,x,y)"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
@ -178,6 +143,7 @@ int main (int argc, char ** argv)
|
||||
double start=usecond();
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
mult(z,x,y);
|
||||
// mac(z,x,y);
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000.0;
|
||||
@ -225,7 +191,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=LMIN;lat<=LMAX;lat+=LADD){
|
||||
@ -250,15 +216,16 @@ int main (int argc, char ** argv)
|
||||
|
||||
|
||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||
double ncbytes=5*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6+8+8)*vol;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<ncbytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
}
|
||||
}
|
||||
#if 1
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x * Cshift(y)"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=LMIN;lat<=LMAX;lat+=LADD){
|
||||
@ -292,10 +259,11 @@ int main (int argc, char ** argv)
|
||||
tmult = tmult /Nloop;
|
||||
|
||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||
double ncbytes=5*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6+8+8)*vol;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
|
||||
time = time * 1000; // convert to NS for GB/s
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" <<ncbytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -187,8 +187,7 @@ int main (int argc, char ** argv)
|
||||
auto xx = coalescedRead(x_v[ss]);
|
||||
auto yy = coalescedRead(y_v[ss]);
|
||||
auto zz = coalescedRead(z_v[ss]);
|
||||
//zz = zz+xx*yy;
|
||||
mac(&zz,&xx,&yy);
|
||||
zz = zz+xx*yy;
|
||||
coalescedWrite(z_v[ss],zz);
|
||||
});
|
||||
}
|
||||
|
@ -1,76 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
awkscript='
|
||||
BEGIN{
|
||||
i = 0;
|
||||
print "local L,std read (MB/s),std write (MB/s),Grid Lime read (MB/s),Grid Lime write (MB/s)"
|
||||
}
|
||||
|
||||
/Benchmark std write/{
|
||||
i = 0;
|
||||
mode = "stdWrite";
|
||||
}
|
||||
|
||||
/Benchmark std read/{
|
||||
i = 0;
|
||||
mode = "stdRead"
|
||||
}
|
||||
|
||||
/Benchmark Grid C-Lime write/{
|
||||
i = 0;
|
||||
mode = "gridWrite";
|
||||
}
|
||||
|
||||
/Benchmark Grid C-Lime read/{
|
||||
i = 0;
|
||||
mode = "gridRead";
|
||||
}
|
||||
|
||||
/Local volume/{
|
||||
match($0, "[0-9]+\\^4");
|
||||
l[i] = substr($0, RSTART, RLENGTH-2);
|
||||
}
|
||||
|
||||
/MB\/s/{
|
||||
match($0, "[0-9.eE]+ MB/s");
|
||||
p = substr($0, RSTART, RLENGTH-5);
|
||||
if (mode == "stdWrite")
|
||||
{
|
||||
sw[i] = p;
|
||||
}
|
||||
else if (mode == "stdRead")
|
||||
{
|
||||
sr[i] = p;
|
||||
}
|
||||
else if (mode == "gridWrite")
|
||||
{
|
||||
gw[i] = p;
|
||||
}
|
||||
else if (mode == "gridRead")
|
||||
{
|
||||
gr[i] = p;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
END{
|
||||
s = 0
|
||||
for (a in l)
|
||||
{
|
||||
s++;
|
||||
}
|
||||
for (j = 0; j < s; j++)
|
||||
{
|
||||
printf("%s,%s,%s,%s,%s\n", l[j], sr[j], sw[j], gr[j], gw[j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
'
|
||||
|
||||
if (( $# != 1 )); then
|
||||
echo "usage: `basename $0` <log file>" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
LOG=$1
|
||||
|
||||
awk "${awkscript}" ${LOG}
|
@ -330,18 +330,12 @@ case ${CXXTEST} in
|
||||
fi
|
||||
;;
|
||||
hipcc)
|
||||
# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
||||
CXXFLAGS="$CXXFLAGS -fno-strict-aliasing"
|
||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
||||
CXXLD=${CXX}
|
||||
if test $ac_openmp = yes; then
|
||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
||||
fi
|
||||
;;
|
||||
dpcpp)
|
||||
LDFLAGS="$LDFLAGS"
|
||||
CXXFLAGS="$CXXFLAGS"
|
||||
CXXLD=${CXX}
|
||||
;;
|
||||
*)
|
||||
CXXLD=${CXX}
|
||||
CXXFLAGS="$CXXFLAGS -fno-strict-aliasing"
|
||||
|
@ -184,19 +184,19 @@ Below are shown the `configure` script invocations for three recommended configu
|
||||
|
||||
This is the build for every day developing and debugging with Xcode. It uses the Xcode clang c++ compiler, without MPI, and defaults to double-precision. Xcode builds the `Debug` configuration with debug symbols for full debugging:
|
||||
|
||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --enable-precision=double --prefix=$GridPre/Debug
|
||||
../configure CXX=clang++ --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-precision=double --prefix=$GridPre/GridDebug --enable-comms=none
|
||||
|
||||
#### 2. `Release`
|
||||
|
||||
Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`, except using single-precision (handy for validation):
|
||||
|
||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --enable-precision=single --prefix=$GridPre/Release
|
||||
../configure CXX=clang++ --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-precision=single --prefix=$GridPre/GridRelease --enable-comms=none
|
||||
|
||||
#### 3. `MPIDebug`
|
||||
|
||||
Debug configuration with MPI:
|
||||
|
||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx --enable-precision=double --prefix=$GridPre/MPIDebug
|
||||
../configure CXX=clang++ --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-precision=double --prefix=$GridPre/GridMPIDebug --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx
|
||||
|
||||
### 5.3 Build Grid
|
||||
|
||||
|
@ -29,7 +29,91 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
void MomentumSpacePropagatorTest(RealD mass,RealD M5, LatticePropagator &prop)
|
||||
{
|
||||
// what type LatticeComplex
|
||||
GridBase *_grid = prop.Grid();
|
||||
|
||||
typedef LatticeFermion FermionField;
|
||||
typedef LatticePropagator PropagatorField;
|
||||
typedef typename FermionField::vector_type vector_type;
|
||||
typedef typename FermionField::scalar_type ScalComplex;
|
||||
typedef iSinglet<ScalComplex> Tcomplex;
|
||||
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
||||
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT
|
||||
};
|
||||
|
||||
Coordinate latt_size = _grid->_fdimensions;
|
||||
|
||||
PropagatorField num (_grid); num = Zero();
|
||||
|
||||
LatComplex sk(_grid); sk = Zero();
|
||||
LatComplex sk2(_grid); sk2= Zero();
|
||||
LatComplex W(_grid); W= Zero();
|
||||
LatComplex a(_grid); a= Zero();
|
||||
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
||||
LatComplex denom(_grid); denom= Zero();
|
||||
LatComplex cosha(_grid);
|
||||
LatComplex kmu(_grid);
|
||||
LatComplex Wea(_grid);
|
||||
LatComplex Wema(_grid);
|
||||
|
||||
ScalComplex ci(0.0,1.0);
|
||||
SpinColourMatrixD identity = ComplexD(1.0);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++) {
|
||||
|
||||
LatticeCoordinate(kmu,mu);
|
||||
|
||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
|
||||
kmu = TwoPiL * kmu;
|
||||
// kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
||||
|
||||
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
|
||||
sk = sk + sin(kmu) *sin(kmu);
|
||||
|
||||
num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*identity);
|
||||
|
||||
}
|
||||
|
||||
W = one - M5 + sk2;
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Cosh alpha -> alpha
|
||||
////////////////////////////////////////////
|
||||
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
||||
|
||||
// FIXME Need a Lattice acosh
|
||||
{
|
||||
autoView(cosha_v,cosha,CpuRead);
|
||||
autoView(a_v,a,CpuWrite);
|
||||
for(int idx=0;idx<_grid->lSites();idx++){
|
||||
Coordinate lcoor(Nd);
|
||||
Tcomplex cc;
|
||||
// RealD sgn;
|
||||
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
||||
peekLocalSite(cc,cosha_v,lcoor);
|
||||
assert((double)real(cc)>=1.0);
|
||||
assert(fabs((double)imag(cc))<=1.0e-15);
|
||||
cc = ScalComplex(::acosh(real(cc)),0.0);
|
||||
pokeLocalSite(cc,a_v,lcoor);
|
||||
}}
|
||||
|
||||
Wea = ( exp( a) * abs(W) );
|
||||
Wema= ( exp(-a) * abs(W) );
|
||||
|
||||
num = num + ( one - Wema ) * mass * identity;
|
||||
denom= ( Wea - one ) + mass*mass * (one - Wema);
|
||||
prop = num/denom;
|
||||
}
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
@ -307,6 +391,17 @@ int main (int argc, char ** argv)
|
||||
RealD M5 =0.8;
|
||||
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5);
|
||||
|
||||
/////////////////// Test code for (1-m)^2 ///////////////
|
||||
LatticePropagatorD prop1(&GRID);
|
||||
LatticePropagatorD prop2(&GRID);
|
||||
LatticeComplexD ratio(&GRID);
|
||||
MomentumSpacePropagatorTest(0.0,M5,prop1);
|
||||
MomentumSpacePropagatorTest(0.3,M5,prop2);
|
||||
ratio=localNorm2(prop2);
|
||||
ratio=ratio/localNorm2(prop1);
|
||||
std::cout << ratio;
|
||||
/////////////////// Test code for (1-m)^2 factor ///////////////
|
||||
|
||||
// Momentum space prop
|
||||
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
|
||||
bool fiveD = false; //calculate 4d free propagator
|
||||
|
@ -565,8 +565,8 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " Running Multigrid "<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
|
||||
BiCGSTAB<CoarseVector> CoarseMgridBiCGSTAB(0.01,1000);
|
||||
BiCGSTAB<LatticeFermion> FineMgridBiCGSTAB(0.0,24);
|
||||
BiCGSTAB<CoarseVector> CoarseMgridBiCGSTAB(0.01,1000,false);
|
||||
BiCGSTAB<LatticeFermion> FineMgridBiCGSTAB(0.0,24,false);
|
||||
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||
ZeroGuesser<LatticeFermion> FineZeroGuesser;
|
||||
|
||||
@ -590,5 +590,5 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "Done "<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
Grid_finalize();
|
||||
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user