mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'develop' into sycl
This commit is contained in:
commit
cdf0a04fc5
@ -252,19 +252,14 @@ public:
|
|||||||
///////////////////////
|
///////////////////////
|
||||||
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
|
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
|
||||||
|
|
||||||
RealD M (const CoarseVector &in, CoarseVector &out)
|
void M (const CoarseVector &in, CoarseVector &out)
|
||||||
{
|
{
|
||||||
|
|
||||||
conformable(_grid,in.Grid());
|
conformable(_grid,in.Grid());
|
||||||
conformable(in.Grid(),out.Grid());
|
conformable(in.Grid(),out.Grid());
|
||||||
|
|
||||||
// RealD Nin = norm2(in);
|
|
||||||
SimpleCompressor<siteVector> compressor;
|
SimpleCompressor<siteVector> compressor;
|
||||||
|
|
||||||
double comms_usec = -usecond();
|
|
||||||
Stencil.HaloExchange(in,compressor);
|
Stencil.HaloExchange(in,compressor);
|
||||||
comms_usec += usecond();
|
|
||||||
|
|
||||||
autoView( in_v , in, AcceleratorRead);
|
autoView( in_v , in, AcceleratorRead);
|
||||||
autoView( out_v , out, AcceleratorWrite);
|
autoView( out_v , out, AcceleratorWrite);
|
||||||
typedef LatticeView<Cobj> Aview;
|
typedef LatticeView<Cobj> Aview;
|
||||||
@ -278,12 +273,7 @@ public:
|
|||||||
typedef decltype(coalescedRead(in_v[0])) calcVector;
|
typedef decltype(coalescedRead(in_v[0])) calcVector;
|
||||||
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
|
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
|
||||||
|
|
||||||
GridStopWatch ArithmeticTimer;
|
|
||||||
int osites=Grid()->oSites();
|
int osites=Grid()->oSites();
|
||||||
// double flops = osites*Nsimd*nbasis*nbasis*8.0*geom.npoint;
|
|
||||||
// double bytes = osites*nbasis*nbasis*geom.npoint*sizeof(CComplex);
|
|
||||||
double usecs =-usecond();
|
|
||||||
// assert(geom.npoint==9);
|
|
||||||
|
|
||||||
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
||||||
int ss = sss/nbasis;
|
int ss = sss/nbasis;
|
||||||
@ -310,18 +300,11 @@ public:
|
|||||||
}
|
}
|
||||||
coalescedWrite(out_v[ss](b),res);
|
coalescedWrite(out_v[ss](b),res);
|
||||||
});
|
});
|
||||||
usecs +=usecond();
|
|
||||||
|
|
||||||
double nrm_usec=-usecond();
|
|
||||||
RealD Nout= norm2(out);
|
|
||||||
nrm_usec+=usecond();
|
|
||||||
|
|
||||||
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose();
|
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose();
|
||||||
|
|
||||||
return Nout;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
RealD Mdag (const CoarseVector &in, CoarseVector &out)
|
void Mdag (const CoarseVector &in, CoarseVector &out)
|
||||||
{
|
{
|
||||||
if(hermitian) {
|
if(hermitian) {
|
||||||
// corresponds to Petrov-Galerkin coarsening
|
// corresponds to Petrov-Galerkin coarsening
|
||||||
@ -332,7 +315,6 @@ public:
|
|||||||
G5C(tmp, in);
|
G5C(tmp, in);
|
||||||
M(tmp, out);
|
M(tmp, out);
|
||||||
G5C(out, out);
|
G5C(out, out);
|
||||||
return norm2(out);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
void MdirComms(const CoarseVector &in)
|
void MdirComms(const CoarseVector &in)
|
||||||
@ -553,8 +535,6 @@ public:
|
|||||||
autoView( A_self , A[self_stencil], AcceleratorWrite);
|
autoView( A_self , A[self_stencil], AcceleratorWrite);
|
||||||
|
|
||||||
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
||||||
// if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });}
|
|
||||||
// accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_self[ss](j,i),A_self(ss)(j,i)+iZProj_v(ss)); });
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,7 +43,6 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class Field> class LinearOperatorBase {
|
template<class Field> class LinearOperatorBase {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// Support for coarsening to a multigrid
|
// Support for coarsening to a multigrid
|
||||||
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
|
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
|
||||||
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
|
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
|
||||||
@ -94,7 +93,10 @@ public:
|
|||||||
_Mat.Mdag(in,out);
|
_Mat.Mdag(in,out);
|
||||||
}
|
}
|
||||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
_Mat.MdagM(in,out,n1,n2);
|
_Mat.MdagM(in,out);
|
||||||
|
ComplexD dot = innerProduct(in,out);
|
||||||
|
n1=real(dot);
|
||||||
|
n2=norm2(out);
|
||||||
}
|
}
|
||||||
void HermOp(const Field &in, Field &out){
|
void HermOp(const Field &in, Field &out){
|
||||||
_Mat.MdagM(in,out);
|
_Mat.MdagM(in,out);
|
||||||
@ -131,17 +133,14 @@ public:
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
_Mat.MdagM(in,out,n1,n2);
|
HermOp(in,out);
|
||||||
out = out + _shift*in;
|
ComplexD dot = innerProduct(in,out);
|
||||||
|
|
||||||
ComplexD dot;
|
|
||||||
dot= innerProduct(in,out);
|
|
||||||
n1=real(dot);
|
n1=real(dot);
|
||||||
n2=norm2(out);
|
n2=norm2(out);
|
||||||
}
|
}
|
||||||
void HermOp(const Field &in, Field &out){
|
void HermOp(const Field &in, Field &out){
|
||||||
RealD n1,n2;
|
_Mat.MdagM(in,out);
|
||||||
HermOpAndNorm(in,out,n1,n2);
|
out = out + _shift*in;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -170,7 +169,7 @@ public:
|
|||||||
_Mat.M(in,out);
|
_Mat.M(in,out);
|
||||||
}
|
}
|
||||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
_Mat.M(in,out);
|
HermOp(in,out);
|
||||||
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
||||||
n2=norm2(out);
|
n2=norm2(out);
|
||||||
}
|
}
|
||||||
@ -216,21 +215,24 @@ public:
|
|||||||
template<class Field>
|
template<class Field>
|
||||||
class SchurOperatorBase : public LinearOperatorBase<Field> {
|
class SchurOperatorBase : public LinearOperatorBase<Field> {
|
||||||
public:
|
public:
|
||||||
virtual RealD Mpc (const Field &in, Field &out) =0;
|
virtual void Mpc (const Field &in, Field &out) =0;
|
||||||
virtual RealD MpcDag (const Field &in, Field &out) =0;
|
virtual void MpcDag (const Field &in, Field &out) =0;
|
||||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
virtual void MpcDagMpc(const Field &in, Field &out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
tmp.Checkerboard() = in.Checkerboard();
|
tmp.Checkerboard() = in.Checkerboard();
|
||||||
ni=Mpc(in,tmp);
|
Mpc(in,tmp);
|
||||||
no=MpcDag(tmp,out);
|
MpcDag(tmp,out);
|
||||||
}
|
}
|
||||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
MpcDagMpc(in,out,n1,n2);
|
MpcDagMpc(in,out);
|
||||||
|
ComplexD dot= innerProduct(in,out);
|
||||||
|
n1=real(dot);
|
||||||
|
n2=norm2(out);
|
||||||
}
|
}
|
||||||
virtual void HermOp(const Field &in, Field &out){
|
virtual void HermOp(const Field &in, Field &out){
|
||||||
RealD n1,n2;
|
out.Checkerboard() = in.Checkerboard();
|
||||||
HermOpAndNorm(in,out,n1,n2);
|
MpcDagMpc(in,out);
|
||||||
}
|
}
|
||||||
void Op (const Field &in, Field &out){
|
void Op (const Field &in, Field &out){
|
||||||
Mpc(in,out);
|
Mpc(in,out);
|
||||||
@ -254,28 +256,24 @@ public:
|
|||||||
public:
|
public:
|
||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
virtual RealD Mpc (const Field &in, Field &out) {
|
virtual void Mpc (const Field &in, Field &out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
tmp.Checkerboard() = !in.Checkerboard();
|
tmp.Checkerboard() = !in.Checkerboard();
|
||||||
//std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
|
|
||||||
|
|
||||||
_Mat.Meooe(in,tmp);
|
_Mat.Meooe(in,tmp);
|
||||||
_Mat.MooeeInv(tmp,out);
|
_Mat.MooeeInv(tmp,out);
|
||||||
_Mat.Meooe(out,tmp);
|
_Mat.Meooe(out,tmp);
|
||||||
|
|
||||||
//std::cout << "cb in " << in.Checkerboard() << " cb out " << out.Checkerboard() << std::endl;
|
|
||||||
_Mat.Mooee(in,out);
|
_Mat.Mooee(in,out);
|
||||||
return axpy_norm(out,-1.0,tmp,out);
|
axpy(out,-1.0,tmp,out);
|
||||||
}
|
}
|
||||||
virtual RealD MpcDag (const Field &in, Field &out){
|
virtual void MpcDag (const Field &in, Field &out){
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
_Mat.MeooeDag(in,tmp);
|
_Mat.MeooeDag(in,tmp);
|
||||||
_Mat.MooeeInvDag(tmp,out);
|
_Mat.MooeeInvDag(tmp,out);
|
||||||
_Mat.MeooeDag(out,tmp);
|
_Mat.MeooeDag(out,tmp);
|
||||||
|
|
||||||
_Mat.MooeeDag(in,out);
|
_Mat.MooeeDag(in,out);
|
||||||
return axpy_norm(out,-1.0,tmp,out);
|
axpy(out,-1.0,tmp,out);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<class Matrix,class Field>
|
template<class Matrix,class Field>
|
||||||
@ -285,25 +283,23 @@ public:
|
|||||||
public:
|
public:
|
||||||
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
|
|
||||||
virtual RealD Mpc (const Field &in, Field &out) {
|
virtual void Mpc (const Field &in, Field &out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
_Mat.Meooe(in,out);
|
_Mat.Meooe(in,out);
|
||||||
_Mat.MooeeInv(out,tmp);
|
_Mat.MooeeInv(out,tmp);
|
||||||
_Mat.Meooe(tmp,out);
|
_Mat.Meooe(tmp,out);
|
||||||
_Mat.MooeeInv(out,tmp);
|
_Mat.MooeeInv(out,tmp);
|
||||||
|
axpy(out,-1.0,tmp,in);
|
||||||
return axpy_norm(out,-1.0,tmp,in);
|
|
||||||
}
|
}
|
||||||
virtual RealD MpcDag (const Field &in, Field &out){
|
virtual void MpcDag (const Field &in, Field &out){
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
_Mat.MooeeInvDag(in,out);
|
_Mat.MooeeInvDag(in,out);
|
||||||
_Mat.MeooeDag(out,tmp);
|
_Mat.MeooeDag(out,tmp);
|
||||||
_Mat.MooeeInvDag(tmp,out);
|
_Mat.MooeeInvDag(tmp,out);
|
||||||
_Mat.MeooeDag(out,tmp);
|
_Mat.MeooeDag(out,tmp);
|
||||||
|
axpy(out,-1.0,tmp,in);
|
||||||
return axpy_norm(out,-1.0,tmp,in);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<class Matrix,class Field>
|
template<class Matrix,class Field>
|
||||||
@ -313,7 +309,7 @@ public:
|
|||||||
public:
|
public:
|
||||||
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
|
|
||||||
virtual RealD Mpc (const Field &in, Field &out) {
|
virtual void Mpc (const Field &in, Field &out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
_Mat.MooeeInv(in,out);
|
_Mat.MooeeInv(in,out);
|
||||||
@ -321,9 +317,9 @@ public:
|
|||||||
_Mat.MooeeInv(tmp,out);
|
_Mat.MooeeInv(tmp,out);
|
||||||
_Mat.Meooe(out,tmp);
|
_Mat.Meooe(out,tmp);
|
||||||
|
|
||||||
return axpy_norm(out,-1.0,tmp,in);
|
axpy(out,-1.0,tmp,in);
|
||||||
}
|
}
|
||||||
virtual RealD MpcDag (const Field &in, Field &out){
|
virtual void MpcDag (const Field &in, Field &out){
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
_Mat.MeooeDag(in,out);
|
_Mat.MeooeDag(in,out);
|
||||||
@ -331,7 +327,7 @@ public:
|
|||||||
_Mat.MeooeDag(tmp,out);
|
_Mat.MeooeDag(tmp,out);
|
||||||
_Mat.MooeeInvDag(out,tmp);
|
_Mat.MooeeInvDag(out,tmp);
|
||||||
|
|
||||||
return axpy_norm(out,-1.0,tmp,in);
|
axpy(out,-1.0,tmp,in);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -339,13 +335,13 @@ public:
|
|||||||
class NonHermitianSchurOperatorBase : public LinearOperatorBase<Field>
|
class NonHermitianSchurOperatorBase : public LinearOperatorBase<Field>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual RealD Mpc (const Field& in, Field& out) = 0;
|
virtual void Mpc (const Field& in, Field& out) = 0;
|
||||||
virtual RealD MpcDag (const Field& in, Field& out) = 0;
|
virtual void MpcDag (const Field& in, Field& out) = 0;
|
||||||
virtual void MpcDagMpc(const Field& in, Field& out, RealD& ni, RealD& no) {
|
virtual void MpcDagMpc(const Field& in, Field& out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
tmp.Checkerboard() = in.Checkerboard();
|
tmp.Checkerboard() = in.Checkerboard();
|
||||||
ni = Mpc(in,tmp);
|
Mpc(in,tmp);
|
||||||
no = MpcDag(tmp,out);
|
MpcDag(tmp,out);
|
||||||
}
|
}
|
||||||
virtual void HermOpAndNorm(const Field& in, Field& out, RealD& n1, RealD& n2) {
|
virtual void HermOpAndNorm(const Field& in, Field& out, RealD& n1, RealD& n2) {
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -366,6 +362,9 @@ public:
|
|||||||
void OpDir(const Field& in, Field& out, int dir, int disp) {
|
void OpDir(const Field& in, Field& out, int dir, int disp) {
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
void OpDirAll(const Field& in, std::vector<Field>& out){
|
||||||
|
assert(0);
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Matrix, class Field>
|
template<class Matrix, class Field>
|
||||||
@ -374,7 +373,7 @@ public:
|
|||||||
public:
|
public:
|
||||||
Matrix& _Mat;
|
Matrix& _Mat;
|
||||||
NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
|
NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
|
||||||
virtual RealD Mpc(const Field& in, Field& out) {
|
virtual void Mpc(const Field& in, Field& out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
tmp.Checkerboard() = !in.Checkerboard();
|
tmp.Checkerboard() = !in.Checkerboard();
|
||||||
|
|
||||||
@ -384,9 +383,9 @@ public:
|
|||||||
|
|
||||||
_Mat.Mooee(in, out);
|
_Mat.Mooee(in, out);
|
||||||
|
|
||||||
return axpy_norm(out, -1.0, tmp, out);
|
axpy(out, -1.0, tmp, out);
|
||||||
}
|
}
|
||||||
virtual RealD MpcDag(const Field& in, Field& out) {
|
virtual void MpcDag(const Field& in, Field& out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
_Mat.MeooeDag(in, tmp);
|
_Mat.MeooeDag(in, tmp);
|
||||||
@ -395,7 +394,7 @@ public:
|
|||||||
|
|
||||||
_Mat.MooeeDag(in, out);
|
_Mat.MooeeDag(in, out);
|
||||||
|
|
||||||
return axpy_norm(out, -1.0, tmp, out);
|
axpy(out, -1.0, tmp, out);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -407,7 +406,7 @@ public:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
|
NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
|
||||||
virtual RealD Mpc(const Field& in, Field& out) {
|
virtual void Mpc(const Field& in, Field& out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
_Mat.Meooe(in, out);
|
_Mat.Meooe(in, out);
|
||||||
@ -415,9 +414,9 @@ public:
|
|||||||
_Mat.Meooe(tmp, out);
|
_Mat.Meooe(tmp, out);
|
||||||
_Mat.MooeeInv(out, tmp);
|
_Mat.MooeeInv(out, tmp);
|
||||||
|
|
||||||
return axpy_norm(out, -1.0, tmp, in);
|
axpy(out, -1.0, tmp, in);
|
||||||
}
|
}
|
||||||
virtual RealD MpcDag(const Field& in, Field& out) {
|
virtual void MpcDag(const Field& in, Field& out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
_Mat.MooeeInvDag(in, out);
|
_Mat.MooeeInvDag(in, out);
|
||||||
@ -425,7 +424,7 @@ public:
|
|||||||
_Mat.MooeeInvDag(tmp, out);
|
_Mat.MooeeInvDag(tmp, out);
|
||||||
_Mat.MeooeDag(out, tmp);
|
_Mat.MeooeDag(out, tmp);
|
||||||
|
|
||||||
return axpy_norm(out, -1.0, tmp, in);
|
axpy(out, -1.0, tmp, in);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -438,7 +437,7 @@ public:
|
|||||||
public:
|
public:
|
||||||
NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
|
NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
|
||||||
|
|
||||||
virtual RealD Mpc(const Field& in, Field& out) {
|
virtual void Mpc(const Field& in, Field& out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
_Mat.MooeeInv(in, out);
|
_Mat.MooeeInv(in, out);
|
||||||
@ -446,9 +445,9 @@ public:
|
|||||||
_Mat.MooeeInv(tmp, out);
|
_Mat.MooeeInv(tmp, out);
|
||||||
_Mat.Meooe(out, tmp);
|
_Mat.Meooe(out, tmp);
|
||||||
|
|
||||||
return axpy_norm(out, -1.0, tmp, in);
|
axpy(out, -1.0, tmp, in);
|
||||||
}
|
}
|
||||||
virtual RealD MpcDag(const Field& in, Field& out) {
|
virtual void MpcDag(const Field& in, Field& out) {
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
_Mat.MeooeDag(in, out);
|
_Mat.MeooeDag(in, out);
|
||||||
@ -456,7 +455,7 @@ public:
|
|||||||
_Mat.MeooeDag(tmp, out);
|
_Mat.MeooeDag(tmp, out);
|
||||||
_Mat.MooeeInvDag(out, tmp);
|
_Mat.MooeeInvDag(out, tmp);
|
||||||
|
|
||||||
return axpy_norm(out, -1.0, tmp, in);
|
axpy(out, -1.0, tmp, in);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -475,71 +474,38 @@ public:
|
|||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
Field tmp;
|
Field tmp;
|
||||||
RealD mass;
|
RealD mass;
|
||||||
double tMpc;
|
|
||||||
double tIP;
|
|
||||||
double tMeo;
|
|
||||||
double taxpby_norm;
|
|
||||||
uint64_t ncall;
|
|
||||||
public:
|
public:
|
||||||
void Report(void)
|
|
||||||
{
|
|
||||||
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
|
|
||||||
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
|
|
||||||
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
|
|
||||||
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
|
|
||||||
}
|
|
||||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||||
{
|
{
|
||||||
assert( _Mat.isTrivialEE() );
|
assert( _Mat.isTrivialEE() );
|
||||||
mass = _Mat.Mass();
|
mass = _Mat.Mass();
|
||||||
tMpc=0;
|
|
||||||
tIP =0;
|
|
||||||
tMeo=0;
|
|
||||||
taxpby_norm=0;
|
|
||||||
ncall=0;
|
|
||||||
}
|
}
|
||||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
ncall++;
|
Mpc(in,out);
|
||||||
tMpc-=usecond();
|
|
||||||
n2 = Mpc(in,out);
|
|
||||||
tMpc+=usecond();
|
|
||||||
tIP-=usecond();
|
|
||||||
ComplexD dot= innerProduct(in,out);
|
ComplexD dot= innerProduct(in,out);
|
||||||
tIP+=usecond();
|
|
||||||
n1 = real(dot);
|
n1 = real(dot);
|
||||||
|
n2 =0.0;
|
||||||
}
|
}
|
||||||
virtual void HermOp(const Field &in, Field &out){
|
virtual void HermOp(const Field &in, Field &out){
|
||||||
ncall++;
|
Mpc(in,out);
|
||||||
tMpc-=usecond();
|
// _Mat.Meooe(in,out);
|
||||||
_Mat.Meooe(in,out);
|
// _Mat.Meooe(out,tmp);
|
||||||
_Mat.Meooe(out,tmp);
|
// axpby(out,-1.0,mass*mass,tmp,in);
|
||||||
tMpc+=usecond();
|
|
||||||
taxpby_norm-=usecond();
|
|
||||||
axpby(out,-1.0,mass*mass,tmp,in);
|
|
||||||
taxpby_norm+=usecond();
|
|
||||||
}
|
}
|
||||||
virtual RealD Mpc (const Field &in, Field &out)
|
virtual void Mpc (const Field &in, Field &out)
|
||||||
{
|
{
|
||||||
|
|
||||||
Field tmp(in.Grid());
|
Field tmp(in.Grid());
|
||||||
Field tmp2(in.Grid());
|
Field tmp2(in.Grid());
|
||||||
|
|
||||||
// std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
|
// _Mat.Mooee(in,out);
|
||||||
_Mat.Mooee(in,out);
|
// _Mat.Mooee(out,tmp);
|
||||||
_Mat.Mooee(out,tmp);
|
|
||||||
// std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
|
|
||||||
|
|
||||||
tMeo-=usecond();
|
|
||||||
_Mat.Meooe(in,out);
|
_Mat.Meooe(in,out);
|
||||||
_Mat.Meooe(out,tmp);
|
_Mat.Meooe(out,tmp);
|
||||||
tMeo+=usecond();
|
axpby(out,-1.0,mass*mass,tmp,in);
|
||||||
taxpby_norm-=usecond();
|
|
||||||
RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
|
|
||||||
taxpby_norm+=usecond();
|
|
||||||
return nn;
|
|
||||||
}
|
}
|
||||||
virtual RealD MpcDag (const Field &in, Field &out){
|
virtual void MpcDag (const Field &in, Field &out){
|
||||||
return Mpc(in,out);
|
Mpc(in,out);
|
||||||
}
|
}
|
||||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||||
assert(0);// Never need with staggered
|
assert(0);// Never need with staggered
|
||||||
@ -547,7 +513,6 @@ public:
|
|||||||
};
|
};
|
||||||
template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
|
template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Base classes for functions of operators
|
// Base classes for functions of operators
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
|
@ -38,16 +38,12 @@ template<class Field> class SparseMatrixBase {
|
|||||||
public:
|
public:
|
||||||
virtual GridBase *Grid(void) =0;
|
virtual GridBase *Grid(void) =0;
|
||||||
// Full checkerboar operations
|
// Full checkerboar operations
|
||||||
virtual RealD M (const Field &in, Field &out)=0;
|
virtual void M (const Field &in, Field &out)=0;
|
||||||
virtual RealD Mdag (const Field &in, Field &out)=0;
|
virtual void Mdag (const Field &in, Field &out)=0;
|
||||||
virtual void MdagM(const Field &in, Field &out,RealD &ni,RealD &no) {
|
|
||||||
Field tmp (in.Grid());
|
|
||||||
ni=M(in,tmp);
|
|
||||||
no=Mdag(tmp,out);
|
|
||||||
}
|
|
||||||
virtual void MdagM(const Field &in, Field &out) {
|
virtual void MdagM(const Field &in, Field &out) {
|
||||||
RealD ni, no;
|
Field tmp (in.Grid());
|
||||||
MdagM(in,out,ni,no);
|
M(in,tmp);
|
||||||
|
Mdag(tmp,out);
|
||||||
}
|
}
|
||||||
virtual void Mdiag (const Field &in, Field &out)=0;
|
virtual void Mdiag (const Field &in, Field &out)=0;
|
||||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
||||||
|
@ -234,10 +234,8 @@ public:
|
|||||||
|
|
||||||
GridBase *grid=in.Grid();
|
GridBase *grid=in.Grid();
|
||||||
|
|
||||||
// std::cout << "Chevyshef(): in.Grid()="<<in.Grid()<<std::endl;
|
|
||||||
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
|
|
||||||
|
|
||||||
int vol=grid->gSites();
|
int vol=grid->gSites();
|
||||||
|
typedef typename Field::vector_type vector_type;
|
||||||
|
|
||||||
Field T0(grid); T0 = in;
|
Field T0(grid); T0 = in;
|
||||||
Field T1(grid);
|
Field T1(grid);
|
||||||
@ -260,12 +258,26 @@ public:
|
|||||||
for(int n=2;n<order;n++){
|
for(int n=2;n<order;n++){
|
||||||
|
|
||||||
Linop.HermOp(*Tn,y);
|
Linop.HermOp(*Tn,y);
|
||||||
// y=xscale*y+mscale*(*Tn);
|
#if 0
|
||||||
// *Tnp=2.0*y-(*Tnm);
|
auto y_v = y.View();
|
||||||
// out=out+Coeffs[n]* (*Tnp);
|
auto Tn_v = Tn->View();
|
||||||
|
auto Tnp_v = Tnp->View();
|
||||||
|
auto Tnm_v = Tnm->View();
|
||||||
|
constexpr int Nsimd = vector_type::Nsimd();
|
||||||
|
accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, {
|
||||||
|
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
|
||||||
|
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
|
||||||
|
});
|
||||||
|
if ( Coeffs[n] != 0.0) {
|
||||||
|
axpy(out,Coeffs[n],*Tnp,out);
|
||||||
|
}
|
||||||
|
#else
|
||||||
axpby(y,xscale,mscale,y,(*Tn));
|
axpby(y,xscale,mscale,y,(*Tn));
|
||||||
axpby(*Tnp,2.0,-1.0,y,(*Tnm));
|
axpby(*Tnp,2.0,-1.0,y,(*Tnm));
|
||||||
|
if ( Coeffs[n] != 0.0) {
|
||||||
axpy(out,Coeffs[n],*Tnp,out);
|
axpy(out,Coeffs[n],*Tnp,out);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
// Cycle pointers to avoid copies
|
// Cycle pointers to avoid copies
|
||||||
Field *swizzle = Tnm;
|
Field *swizzle = Tnm;
|
||||||
Tnm =Tn;
|
Tnm =Tn;
|
||||||
|
@ -37,218 +37,6 @@ Author: Christoph Lehner <clehner@bnl.gov>
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
// Move following 100 LOC to lattice/Lattice_basis.h
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
template<class Field>
|
|
||||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
|
|
||||||
{
|
|
||||||
// If assume basis[j] are already orthonormal,
|
|
||||||
// can take all inner products in parallel saving 2x bandwidth
|
|
||||||
// Save 3x bandwidth on the second line of loop.
|
|
||||||
// perhaps 2.5x speed up.
|
|
||||||
// 2x overall in Multigrid Lanczos
|
|
||||||
for(int j=0; j<k; ++j){
|
|
||||||
auto ip = innerProduct(basis[j],w);
|
|
||||||
w = w - ip*basis[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Field>
|
|
||||||
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)
|
|
||||||
{
|
|
||||||
GridBase* grid = basis[0].Grid();
|
|
||||||
|
|
||||||
typedef typename Field::vector_object vobj;
|
|
||||||
typedef decltype(basis[0].View(CpuWrite)) View;
|
|
||||||
|
|
||||||
Vector<View> basis_v; basis_v.reserve(basis.size());
|
|
||||||
|
|
||||||
for(int k=0;k<basis.size();k++) basis_v.push_back(basis[k].View(CpuWrite));
|
|
||||||
|
|
||||||
View *basis_vp = &basis_v[0];
|
|
||||||
|
|
||||||
#if 1
|
|
||||||
std::vector < vobj , commAllocator<vobj> > Bt(thread_max() * Nm); // Thread private
|
|
||||||
thread_region
|
|
||||||
{
|
|
||||||
vobj* B = Bt.data() + Nm * thread_num();
|
|
||||||
|
|
||||||
thread_for_in_region(ss, grid->oSites(),{
|
|
||||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
|
||||||
|
|
||||||
for(int j=j0; j<j1; ++j){
|
|
||||||
for(int k=k0; k<k1; ++k){
|
|
||||||
B[j] +=Qt(j,k) * basis_v[k][ss];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(int j=j0; j<j1; ++j){
|
|
||||||
basis_v[j][ss] = B[j];
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
|
|
||||||
int nrot = j1-j0;
|
|
||||||
|
|
||||||
|
|
||||||
uint64_t oSites =grid->oSites();
|
|
||||||
uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead
|
|
||||||
|
|
||||||
// printf("BasisRotate %d %d nrot %d siteBlock %d\n",j0,j1,nrot,siteBlock);
|
|
||||||
|
|
||||||
Vector <vobj> Bt(siteBlock * nrot);
|
|
||||||
auto Bp=&Bt[0];
|
|
||||||
|
|
||||||
// GPU readable copy of Eigen matrix
|
|
||||||
Vector<double> Qt_jv(Nm*Nm);
|
|
||||||
double *Qt_p = & Qt_jv[0];
|
|
||||||
for(int k=0;k<Nm;++k){
|
|
||||||
for(int j=0;j<Nm;++j){
|
|
||||||
Qt_p[j*Nm+k]=Qt(j,k);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Block the loop to keep storage footprint down
|
|
||||||
for(uint64_t s=0;s<oSites;s+=siteBlock){
|
|
||||||
|
|
||||||
// remaining work in this block
|
|
||||||
int ssites=MIN(siteBlock,oSites-s);
|
|
||||||
|
|
||||||
// zero out the accumulators
|
|
||||||
accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{
|
|
||||||
auto z=coalescedRead(Bp[ss]);
|
|
||||||
z=Zero();
|
|
||||||
coalescedWrite(Bp[ss],z);
|
|
||||||
});
|
|
||||||
|
|
||||||
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
|
||||||
|
|
||||||
int j =sj%nrot;
|
|
||||||
int jj =j0+j;
|
|
||||||
int ss =sj/nrot;
|
|
||||||
int sss=ss+s;
|
|
||||||
|
|
||||||
for(int k=k0; k<k1; ++k){
|
|
||||||
auto tmp = coalescedRead(Bp[ss*nrot+j]);
|
|
||||||
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_vp[k][sss]));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
|
||||||
int j =sj%nrot;
|
|
||||||
int jj =j0+j;
|
|
||||||
int ss =sj/nrot;
|
|
||||||
int sss=ss+s;
|
|
||||||
coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j]));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
for(int k=0;k<basis.size();k++) basis_v[k].ViewClose();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract a single rotated vector
|
|
||||||
template<class Field>
|
|
||||||
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
|
|
||||||
{
|
|
||||||
GridBase* grid = basis[0].Grid();
|
|
||||||
typedef typename Field::vector_object vobj;
|
|
||||||
typedef decltype(basis[0].View(AcceleratorWrite)) View;
|
|
||||||
|
|
||||||
result.Checkerboard() = basis[0].Checkerboard();
|
|
||||||
|
|
||||||
autoView(result_v,result, AcceleratorWrite);
|
|
||||||
Vector<View> basis_v; basis_v.reserve(basis.size());
|
|
||||||
View * basis_vp = &basis_v[0];
|
|
||||||
|
|
||||||
for(int k=0;k<basis.size();k++) basis_v.push_back(basis[k].View(AcceleratorRead));
|
|
||||||
|
|
||||||
Vector<double> Qt_jv(Nm); double * Qt_j = & Qt_jv[0];
|
|
||||||
|
|
||||||
for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k);
|
|
||||||
|
|
||||||
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
|
|
||||||
auto B=coalescedRead(basis_vp[k0][ss]);
|
|
||||||
B=Zero();
|
|
||||||
for(int k=k0; k<k1; ++k){
|
|
||||||
B +=Qt_j[k] * coalescedRead(basis_vp[k][ss]);
|
|
||||||
}
|
|
||||||
coalescedWrite(result_v[ss], B);
|
|
||||||
});
|
|
||||||
for(int k=0;k<basis.size();k++) basis_v[k].ViewClose();
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Field>
|
|
||||||
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
|
|
||||||
{
|
|
||||||
int vlen = idx.size();
|
|
||||||
|
|
||||||
assert(vlen>=1);
|
|
||||||
assert(vlen<=sort_vals.size());
|
|
||||||
assert(vlen<=_v.size());
|
|
||||||
|
|
||||||
for (size_t i=0;i<vlen;i++) {
|
|
||||||
|
|
||||||
if (idx[i] != i) {
|
|
||||||
|
|
||||||
//////////////////////////////////////
|
|
||||||
// idx[i] is a table of desired sources giving a permutation.
|
|
||||||
// Swap v[i] with v[idx[i]].
|
|
||||||
// Find j>i for which _vnew[j] = _vold[i],
|
|
||||||
// track the move idx[j] => idx[i]
|
|
||||||
// track the move idx[i] => i
|
|
||||||
//////////////////////////////////////
|
|
||||||
size_t j;
|
|
||||||
for (j=i;j<idx.size();j++)
|
|
||||||
if (idx[j]==i)
|
|
||||||
break;
|
|
||||||
|
|
||||||
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
|
|
||||||
|
|
||||||
swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy
|
|
||||||
std::swap(sort_vals[i],sort_vals[idx[i]]);
|
|
||||||
|
|
||||||
idx[j] = idx[i];
|
|
||||||
idx[i] = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
|
|
||||||
{
|
|
||||||
std::vector<int> idx(sort_vals.size());
|
|
||||||
std::iota(idx.begin(), idx.end(), 0);
|
|
||||||
|
|
||||||
// sort indexes based on comparing values in v
|
|
||||||
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
|
|
||||||
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
|
|
||||||
});
|
|
||||||
return idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Field>
|
|
||||||
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
|
|
||||||
{
|
|
||||||
std::vector<int> idx = basisSortGetIndex(sort_vals);
|
|
||||||
if (reverse)
|
|
||||||
std::reverse(idx.begin(), idx.end());
|
|
||||||
|
|
||||||
basisReorderInPlace(_v,sort_vals,idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
// PAB: faster to compute the inner products first then fuse loops.
|
|
||||||
// If performance critical can improve.
|
|
||||||
template<class Field>
|
|
||||||
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
|
|
||||||
result = Zero();
|
|
||||||
assert(_v.size()==eval.size());
|
|
||||||
int N = (int)_v.size();
|
|
||||||
for (int i=0;i<N;i++) {
|
|
||||||
Field& tmp = _v[i];
|
|
||||||
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Implicitly restarted lanczos
|
// Implicitly restarted lanczos
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
|
@ -6,72 +6,6 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
MemoryStats *MemoryProfiler::stats = nullptr;
|
MemoryStats *MemoryProfiler::stats = nullptr;
|
||||||
bool MemoryProfiler::debug = false;
|
bool MemoryProfiler::debug = false;
|
||||||
|
|
||||||
#ifdef GRID_CUDA
|
|
||||||
#define SMALL_LIMIT (0)
|
|
||||||
#else
|
|
||||||
#define SMALL_LIMIT (4096)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef ALLOCATION_CACHE
|
|
||||||
int PointerCache::victim;
|
|
||||||
|
|
||||||
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
|
|
||||||
|
|
||||||
void *PointerCache::Insert(void *ptr,size_t bytes) {
|
|
||||||
|
|
||||||
if (bytes < SMALL_LIMIT ) return ptr;
|
|
||||||
|
|
||||||
#ifdef GRID_OMP
|
|
||||||
assert(omp_in_parallel()==0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void * ret = NULL;
|
|
||||||
int v = -1;
|
|
||||||
|
|
||||||
for(int e=0;e<Ncache;e++) {
|
|
||||||
if ( Entries[e].valid==0 ) {
|
|
||||||
v=e;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( v==-1 ) {
|
|
||||||
v=victim;
|
|
||||||
victim = (victim+1)%Ncache;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( Entries[v].valid ) {
|
|
||||||
ret = Entries[v].address;
|
|
||||||
Entries[v].valid = 0;
|
|
||||||
Entries[v].address = NULL;
|
|
||||||
Entries[v].bytes = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
Entries[v].address=ptr;
|
|
||||||
Entries[v].bytes =bytes;
|
|
||||||
Entries[v].valid =1;
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void *PointerCache::Lookup(size_t bytes) {
|
|
||||||
|
|
||||||
if (bytes < SMALL_LIMIT ) return NULL;
|
|
||||||
|
|
||||||
#ifdef GRID_OMP
|
|
||||||
assert(omp_in_parallel()==0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for(int e=0;e<Ncache;e++){
|
|
||||||
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
|
|
||||||
Entries[e].valid = 0;
|
|
||||||
return Entries[e].address;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void check_huge_pages(void *Buf,uint64_t BYTES)
|
void check_huge_pages(void *Buf,uint64_t BYTES)
|
||||||
{
|
{
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
|
@ -114,6 +114,7 @@ public:
|
|||||||
void GlobalSumVector(RealD *,int N);
|
void GlobalSumVector(RealD *,int N);
|
||||||
void GlobalSum(uint32_t &);
|
void GlobalSum(uint32_t &);
|
||||||
void GlobalSum(uint64_t &);
|
void GlobalSum(uint64_t &);
|
||||||
|
void GlobalSumVector(uint64_t*,int N);
|
||||||
void GlobalSum(ComplexF &c);
|
void GlobalSum(ComplexF &c);
|
||||||
void GlobalSumVector(ComplexF *c,int N);
|
void GlobalSumVector(ComplexF *c,int N);
|
||||||
void GlobalSum(ComplexD &c);
|
void GlobalSum(ComplexD &c);
|
||||||
|
@ -255,6 +255,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
|
|||||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
}
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(uint64_t* u,int N){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,u,N,MPI_UINT64_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
void CartesianCommunicator::GlobalXOR(uint32_t &u){
|
void CartesianCommunicator::GlobalXOR(uint32_t &u){
|
||||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
|
@ -70,9 +70,10 @@ CartesianCommunicator::~CartesianCommunicator(){}
|
|||||||
void CartesianCommunicator::GlobalSum(float &){}
|
void CartesianCommunicator::GlobalSum(float &){}
|
||||||
void CartesianCommunicator::GlobalSumVector(float *,int N){}
|
void CartesianCommunicator::GlobalSumVector(float *,int N){}
|
||||||
void CartesianCommunicator::GlobalSum(double &){}
|
void CartesianCommunicator::GlobalSum(double &){}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
||||||
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
||||||
void CartesianCommunicator::GlobalSum(uint64_t &){}
|
void CartesianCommunicator::GlobalSum(uint64_t &){}
|
||||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
void CartesianCommunicator::GlobalSumVector(uint64_t *,int N){}
|
||||||
void CartesianCommunicator::GlobalXOR(uint32_t &){}
|
void CartesianCommunicator::GlobalXOR(uint32_t &){}
|
||||||
void CartesianCommunicator::GlobalXOR(uint64_t &){}
|
void CartesianCommunicator::GlobalXOR(uint64_t &){}
|
||||||
|
|
||||||
|
@ -74,7 +74,9 @@ void *SharedMemory::ShmBufferMalloc(size_t bytes){
|
|||||||
if (heap_bytes >= heap_size) {
|
if (heap_bytes >= heap_size) {
|
||||||
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
||||||
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
||||||
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
|
std::cout<< " Current alloc is " << (bytes/(1024*1024)) <<"MB"<<std::endl;
|
||||||
|
std::cout<< " Current bytes is " << (heap_bytes/(1024*1024)) <<"MB"<<std::endl;
|
||||||
|
std::cout<< " Current heap is " << (heap_size/(1024*1024)) <<"MB"<<std::endl;
|
||||||
assert(heap_bytes<heap_size);
|
assert(heap_bytes<heap_size);
|
||||||
}
|
}
|
||||||
//std::cerr << "ShmBufferMalloc "<<std::hex<< ptr<<" - "<<((uint64_t)ptr+bytes)<<std::dec<<std::endl;
|
//std::cerr << "ShmBufferMalloc "<<std::hex<< ptr<<" - "<<((uint64_t)ptr+bytes)<<std::dec<<std::endl;
|
||||||
|
@ -49,4 +49,29 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
|
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
template<typename Op, typename T1>
|
||||||
|
auto Cshift(const LatticeUnaryExpression<Op,T1> &expr,int dim,int shift)
|
||||||
|
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||||
|
{
|
||||||
|
return Cshift(closure(expr),dim,shift);
|
||||||
|
}
|
||||||
|
template <class Op, class T1, class T2>
|
||||||
|
auto Cshift(const LatticeBinaryExpression<Op,T1,T2> &expr,int dim,int shift)
|
||||||
|
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))>
|
||||||
|
{
|
||||||
|
return Cshift(closure(expr),dim,shift);
|
||||||
|
}
|
||||||
|
template <class Op, class T1, class T2, class T3>
|
||||||
|
auto Cshift(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr,int dim,int shift)
|
||||||
|
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
|
||||||
|
eval(0, expr.arg2),
|
||||||
|
eval(0, expr.arg3)))>
|
||||||
|
{
|
||||||
|
return Cshift(closure(expr),dim,shift);
|
||||||
|
}
|
||||||
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/lattice/Lattice_local.h>
|
#include <Grid/lattice/Lattice_local.h>
|
||||||
#include <Grid/lattice/Lattice_reduction.h>
|
#include <Grid/lattice/Lattice_reduction.h>
|
||||||
#include <Grid/lattice/Lattice_peekpoke.h>
|
#include <Grid/lattice/Lattice_peekpoke.h>
|
||||||
#include <Grid/lattice/Lattice_reality.h>
|
//#include <Grid/lattice/Lattice_reality.h>
|
||||||
#include <Grid/lattice/Lattice_comparison_utils.h>
|
#include <Grid/lattice/Lattice_comparison_utils.h>
|
||||||
#include <Grid/lattice/Lattice_comparison.h>
|
#include <Grid/lattice/Lattice_comparison.h>
|
||||||
#include <Grid/lattice/Lattice_coordinate.h>
|
#include <Grid/lattice/Lattice_coordinate.h>
|
||||||
@ -44,4 +44,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/lattice/Lattice_rng.h>
|
#include <Grid/lattice/Lattice_rng.h>
|
||||||
#include <Grid/lattice/Lattice_unary.h>
|
#include <Grid/lattice/Lattice_unary.h>
|
||||||
#include <Grid/lattice/Lattice_transfer.h>
|
#include <Grid/lattice/Lattice_transfer.h>
|
||||||
|
#include <Grid/lattice/Lattice_basis.h>
|
||||||
|
@ -9,6 +9,7 @@ Copyright (C) 2015
|
|||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -99,7 +100,7 @@ const lobj & eval(const uint64_t ss, const LatticeView<lobj> &arg)
|
|||||||
template <class lobj> accelerator_inline
|
template <class lobj> accelerator_inline
|
||||||
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg)
|
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg)
|
||||||
{
|
{
|
||||||
auto view = arg.View();
|
auto view = arg.View(AcceleratorRead);
|
||||||
return view[ss];
|
return view[ss];
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -9,6 +9,7 @@ Copyright (C) 2015
|
|||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -91,6 +92,7 @@ public:
|
|||||||
// The view is trivially copy constructible and may be copied to an accelerator device
|
// The view is trivially copy constructible and may be copied to an accelerator device
|
||||||
// in device lambdas
|
// in device lambdas
|
||||||
/////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
LatticeView<vobj> View (ViewMode mode) const
|
LatticeView<vobj> View (ViewMode mode) const
|
||||||
{
|
{
|
||||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
|
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
|
||||||
|
226
Grid/lattice/Lattice_basis.h
Normal file
226
Grid/lattice/Lattice_basis.h
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/lattice/Lattice_basis.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
|
||||||
|
{
|
||||||
|
// If assume basis[j] are already orthonormal,
|
||||||
|
// can take all inner products in parallel saving 2x bandwidth
|
||||||
|
// Save 3x bandwidth on the second line of loop.
|
||||||
|
// perhaps 2.5x speed up.
|
||||||
|
// 2x overall in Multigrid Lanczos
|
||||||
|
for(int j=0; j<k; ++j){
|
||||||
|
auto ip = innerProduct(basis[j],w);
|
||||||
|
w = w - ip*basis[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class VField, class Matrix>
|
||||||
|
void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
|
||||||
|
{
|
||||||
|
typedef decltype(basis[0]) Field;
|
||||||
|
typedef decltype(basis[0].View(AcceleratorRead)) View;
|
||||||
|
|
||||||
|
Vector<View> basis_v; basis_v.reserve(basis.size());
|
||||||
|
GridBase* grid = basis[0].Grid();
|
||||||
|
|
||||||
|
for(int k=0;k<basis.size();k++){
|
||||||
|
basis_v.push_back(basis[k].View(AcceleratorWrite));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
View *basis_vp = &basis_v[0];
|
||||||
|
|
||||||
|
int nrot = j1-j0;
|
||||||
|
if (!nrot) // edge case not handled gracefully by Cuda
|
||||||
|
return;
|
||||||
|
|
||||||
|
uint64_t oSites =grid->oSites();
|
||||||
|
uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead
|
||||||
|
|
||||||
|
typedef typename std::remove_reference<decltype(basis_v[0][0])>::type vobj;
|
||||||
|
|
||||||
|
Vector <vobj> Bt(siteBlock * nrot);
|
||||||
|
auto Bp=&Bt[0];
|
||||||
|
|
||||||
|
// GPU readable copy of matrix
|
||||||
|
Vector<double> Qt_jv(Nm*Nm);
|
||||||
|
double *Qt_p = & Qt_jv[0];
|
||||||
|
thread_for(i,Nm*Nm,{
|
||||||
|
int j = i/Nm;
|
||||||
|
int k = i%Nm;
|
||||||
|
Qt_p[i]=Qt(j,k);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Block the loop to keep storage footprint down
|
||||||
|
for(uint64_t s=0;s<oSites;s+=siteBlock){
|
||||||
|
|
||||||
|
// remaining work in this block
|
||||||
|
int ssites=MIN(siteBlock,oSites-s);
|
||||||
|
|
||||||
|
// zero out the accumulators
|
||||||
|
accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{
|
||||||
|
decltype(coalescedRead(Bp[ss])) z;
|
||||||
|
z=Zero();
|
||||||
|
coalescedWrite(Bp[ss],z);
|
||||||
|
});
|
||||||
|
|
||||||
|
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
||||||
|
|
||||||
|
int j =sj%nrot;
|
||||||
|
int jj =j0+j;
|
||||||
|
int ss =sj/nrot;
|
||||||
|
int sss=ss+s;
|
||||||
|
|
||||||
|
for(int k=k0; k<k1; ++k){
|
||||||
|
auto tmp = coalescedRead(Bp[ss*nrot+j]);
|
||||||
|
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss]));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
||||||
|
int j =sj%nrot;
|
||||||
|
int jj =j0+j;
|
||||||
|
int ss =sj/nrot;
|
||||||
|
int sss=ss+s;
|
||||||
|
coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j]));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int k=0;k<basis.size();k++) basis_v[k].ViewClose();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract a single rotated vector
|
||||||
|
template<class Field>
|
||||||
|
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
|
||||||
|
{
|
||||||
|
typedef decltype(basis[0].View(AcceleratorRead)) View;
|
||||||
|
typedef typename Field::vector_object vobj;
|
||||||
|
GridBase* grid = basis[0].Grid();
|
||||||
|
|
||||||
|
result.Checkerboard() = basis[0].Checkerboard();
|
||||||
|
|
||||||
|
Vector<View> basis_v; basis_v.reserve(basis.size());
|
||||||
|
for(int k=0;k<basis.size();k++){
|
||||||
|
basis_v.push_back(basis[k].View(AcceleratorRead));
|
||||||
|
}
|
||||||
|
vobj zz=Zero();
|
||||||
|
Vector<double> Qt_jv(Nm);
|
||||||
|
double * Qt_j = & Qt_jv[0];
|
||||||
|
for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k);
|
||||||
|
|
||||||
|
autoView(result_v,result,AcceleratorWrite);
|
||||||
|
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
|
||||||
|
auto B=coalescedRead(zz);
|
||||||
|
for(int k=k0; k<k1; ++k){
|
||||||
|
B +=Qt_j[k] * coalescedRead(basis_v[k][ss]);
|
||||||
|
}
|
||||||
|
coalescedWrite(result_v[ss], B);
|
||||||
|
});
|
||||||
|
for(int k=0;k<basis.size();k++) basis_v[k].ViewClose();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
|
||||||
|
{
|
||||||
|
int vlen = idx.size();
|
||||||
|
|
||||||
|
assert(vlen>=1);
|
||||||
|
assert(vlen<=sort_vals.size());
|
||||||
|
assert(vlen<=_v.size());
|
||||||
|
|
||||||
|
for (size_t i=0;i<vlen;i++) {
|
||||||
|
|
||||||
|
if (idx[i] != i) {
|
||||||
|
|
||||||
|
//////////////////////////////////////
|
||||||
|
// idx[i] is a table of desired sources giving a permutation.
|
||||||
|
// Swap v[i] with v[idx[i]].
|
||||||
|
// Find j>i for which _vnew[j] = _vold[i],
|
||||||
|
// track the move idx[j] => idx[i]
|
||||||
|
// track the move idx[i] => i
|
||||||
|
//////////////////////////////////////
|
||||||
|
size_t j;
|
||||||
|
for (j=i;j<idx.size();j++)
|
||||||
|
if (idx[j]==i)
|
||||||
|
break;
|
||||||
|
|
||||||
|
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
|
||||||
|
|
||||||
|
swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy
|
||||||
|
std::swap(sort_vals[i],sort_vals[idx[i]]);
|
||||||
|
|
||||||
|
idx[j] = idx[i];
|
||||||
|
idx[i] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
|
||||||
|
{
|
||||||
|
std::vector<int> idx(sort_vals.size());
|
||||||
|
std::iota(idx.begin(), idx.end(), 0);
|
||||||
|
|
||||||
|
// sort indexes based on comparing values in v
|
||||||
|
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
|
||||||
|
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
|
||||||
|
});
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
|
||||||
|
{
|
||||||
|
std::vector<int> idx = basisSortGetIndex(sort_vals);
|
||||||
|
if (reverse)
|
||||||
|
std::reverse(idx.begin(), idx.end());
|
||||||
|
|
||||||
|
basisReorderInPlace(_v,sort_vals,idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
// PAB: faster to compute the inner products first then fuse loops.
|
||||||
|
// If performance critical can improve.
|
||||||
|
template<class Field>
|
||||||
|
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
|
||||||
|
result = Zero();
|
||||||
|
assert(_v.size()==eval.size());
|
||||||
|
int N = (int)_v.size();
|
||||||
|
for (int i=0;i<N;i++) {
|
||||||
|
Field& tmp = _v[i];
|
||||||
|
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NAMESPACE_END(Grid);
|
@ -40,8 +40,11 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
|
|
||||||
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||||
Lattice<vobj> ret(lhs.Grid());
|
Lattice<vobj> ret(lhs.Grid());
|
||||||
|
|
||||||
autoView( lhs_v, lhs, AcceleratorRead);
|
autoView( lhs_v, lhs, AcceleratorRead);
|
||||||
autoView( ret_v, ret, AcceleratorWrite);
|
autoView( ret_v, ret, AcceleratorWrite);
|
||||||
|
|
||||||
|
ret.Checkerboard()=lhs.Checkerboard();
|
||||||
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
||||||
coalescedWrite(ret_v[ss], adj(lhs_v(ss)));
|
coalescedWrite(ret_v[ss], adj(lhs_v(ss)));
|
||||||
});
|
});
|
||||||
@ -50,8 +53,11 @@ template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
|||||||
|
|
||||||
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
||||||
Lattice<vobj> ret(lhs.Grid());
|
Lattice<vobj> ret(lhs.Grid());
|
||||||
|
|
||||||
autoView( lhs_v, lhs, AcceleratorRead);
|
autoView( lhs_v, lhs, AcceleratorRead);
|
||||||
autoView( ret_v, ret, AcceleratorWrite);
|
autoView( ret_v, ret, AcceleratorWrite);
|
||||||
|
|
||||||
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
||||||
coalescedWrite( ret_v[ss] , conjugate(lhs_v(ss)));
|
coalescedWrite( ret_v[ss] , conjugate(lhs_v(ss)));
|
||||||
});
|
});
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -64,6 +65,37 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites)
|
|||||||
|
|
||||||
return ssum;
|
return ssum;
|
||||||
}
|
}
|
||||||
|
template<class vobj>
|
||||||
|
inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_objectD sobj;
|
||||||
|
|
||||||
|
const int nthread = GridThread::GetThreads();
|
||||||
|
|
||||||
|
Vector<sobj> sumarray(nthread);
|
||||||
|
for(int i=0;i<nthread;i++){
|
||||||
|
sumarray[i]=Zero();
|
||||||
|
}
|
||||||
|
|
||||||
|
thread_for(thr,nthread, {
|
||||||
|
int nwork, mywork, myoff;
|
||||||
|
nwork = osites;
|
||||||
|
GridThread::GetWork(nwork,thr,mywork,myoff);
|
||||||
|
vobj vvsum=Zero();
|
||||||
|
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||||
|
vvsum = vvsum + arg[ss];
|
||||||
|
}
|
||||||
|
sumarray[thr]=Reduce(vvsum);
|
||||||
|
});
|
||||||
|
|
||||||
|
sobj ssum=Zero(); // sum across threads
|
||||||
|
for(int i=0;i<nthread;i++){
|
||||||
|
ssum = ssum+sumarray[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return ssum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
|
inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
|
||||||
@ -74,6 +106,15 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
|
|||||||
return sum_cpu(arg,osites);
|
return sum_cpu(arg,osites);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
template<class vobj>
|
||||||
|
inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites)
|
||||||
|
{
|
||||||
|
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
||||||
|
return sumD_gpu(arg,osites);
|
||||||
|
#else
|
||||||
|
return sumD_cpu(arg,osites);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
||||||
@ -101,7 +142,7 @@ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
|||||||
|
|
||||||
// Double inner product
|
// Double inner product
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||||
{
|
{
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::vector_typeD vector_type;
|
typedef typename vobj::vector_typeD vector_type;
|
||||||
@ -113,32 +154,37 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
|||||||
const uint64_t sites = grid->oSites();
|
const uint64_t sites = grid->oSites();
|
||||||
|
|
||||||
// Might make all code paths go this way.
|
// Might make all code paths go this way.
|
||||||
|
typedef decltype(innerProduct(vobj(),vobj())) inner_t;
|
||||||
|
Vector<inner_t> inner_tmp(sites);
|
||||||
|
auto inner_tmp_v = &inner_tmp[0];
|
||||||
|
|
||||||
|
{
|
||||||
autoView( left_v , left, AcceleratorRead);
|
autoView( left_v , left, AcceleratorRead);
|
||||||
autoView( right_v,right, AcceleratorRead);
|
autoView( right_v,right, AcceleratorRead);
|
||||||
|
|
||||||
// GPU - SIMT lane compliance...
|
// GPU - SIMT lane compliance...
|
||||||
typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t;
|
|
||||||
Vector<inner_t> inner_tmp(sites);
|
|
||||||
auto inner_tmp_v = &inner_tmp[0];
|
|
||||||
|
|
||||||
accelerator_for( ss, sites, nsimd,{
|
accelerator_for( ss, sites, nsimd,{
|
||||||
auto x_l = left_v(ss);
|
auto x_l = left_v(ss);
|
||||||
auto y_l = right_v(ss);
|
auto y_l = right_v(ss);
|
||||||
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
|
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
|
||||||
})
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// This is in single precision and fails some tests
|
// This is in single precision and fails some tests
|
||||||
// Need a sumD that sums in double
|
// Need a sumD that sums in double
|
||||||
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
nrm = TensorRemove(sumD(inner_tmp_v,sites));
|
||||||
nrm = TensorRemove(sumD_gpu(inner_tmp_v,sites));
|
|
||||||
#else
|
|
||||||
nrm = TensorRemove(sum_cpu(inner_tmp_v,sites));
|
|
||||||
#endif
|
|
||||||
grid->GlobalSum(nrm);
|
|
||||||
|
|
||||||
return nrm;
|
return nrm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) {
|
||||||
|
GridBase *grid = left.Grid();
|
||||||
|
ComplexD nrm = rankInnerProduct(left,right);
|
||||||
|
grid->GlobalSum(nrm);
|
||||||
|
return nrm;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
// Fast axpby_norm
|
// Fast axpby_norm
|
||||||
// z = a x + b y
|
// z = a x + b y
|
||||||
@ -181,16 +227,50 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
|||||||
coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp));
|
coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp));
|
||||||
coalescedWrite(z_v[ss],tmp);
|
coalescedWrite(z_v[ss],tmp);
|
||||||
});
|
});
|
||||||
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
nrm = real(TensorRemove(sumD(inner_tmp_v,sites)));
|
||||||
nrm = real(TensorRemove(sumD_gpu(inner_tmp_v,sites)));
|
|
||||||
#else
|
|
||||||
// Already promoted to double
|
|
||||||
nrm = real(TensorRemove(sum(inner_tmp_v,sites)));
|
|
||||||
#endif
|
|
||||||
grid->GlobalSum(nrm);
|
grid->GlobalSum(nrm);
|
||||||
return nrm;
|
return nrm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class vobj> strong_inline void
|
||||||
|
innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||||
|
{
|
||||||
|
conformable(left,right);
|
||||||
|
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
typedef typename vobj::vector_typeD vector_type;
|
||||||
|
Vector<ComplexD> tmp(2);
|
||||||
|
|
||||||
|
GridBase *grid = left.Grid();
|
||||||
|
|
||||||
|
|
||||||
|
const uint64_t nsimd = grid->Nsimd();
|
||||||
|
const uint64_t sites = grid->oSites();
|
||||||
|
|
||||||
|
// GPU
|
||||||
|
typedef decltype(innerProduct(vobj(),vobj())) inner_t;
|
||||||
|
typedef decltype(innerProduct(vobj(),vobj())) norm_t;
|
||||||
|
Vector<inner_t> inner_tmp(sites);
|
||||||
|
Vector<norm_t> norm_tmp(sites);
|
||||||
|
auto inner_tmp_v = &inner_tmp[0];
|
||||||
|
auto norm_tmp_v = &norm_tmp[0];
|
||||||
|
{
|
||||||
|
autoView(left_v,left, AcceleratorRead);
|
||||||
|
autoView(right_v,right,AcceleratorRead);
|
||||||
|
accelerator_for( ss, sites, nsimd,{
|
||||||
|
auto left_tmp = left_v(ss);
|
||||||
|
coalescedWrite(inner_tmp_v[ss],innerProduct(left_tmp,right_v(ss)));
|
||||||
|
coalescedWrite(norm_tmp_v[ss],innerProduct(left_tmp,left_tmp));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp[0] = TensorRemove(sumD(inner_tmp_v,sites));
|
||||||
|
tmp[1] = TensorRemove(sumD(norm_tmp_v,sites));
|
||||||
|
|
||||||
|
grid->GlobalSumVector(&tmp[0],2); // keep norm Complex -> can use GlobalSumVector
|
||||||
|
ip = tmp[0];
|
||||||
|
nrm = real(tmp[1]);
|
||||||
|
}
|
||||||
|
|
||||||
template<class Op,class T1>
|
template<class Op,class T1>
|
||||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||||
|
@ -37,6 +37,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Trace
|
// Trace
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/*
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))>
|
inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))>
|
||||||
{
|
{
|
||||||
@ -48,6 +49,7 @@ inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))>
|
|||||||
});
|
});
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Trace Index level dependent operation
|
// Trace Index level dependent operation
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -84,30 +85,136 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj,class CComplex,int nbasis>
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Flexible Type Conversion for internal promotion to double as well as graceful
|
||||||
|
// treatment of scalar-compatible types
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
accelerator_inline void convertType(ComplexD & out, const std::complex<double> & in) {
|
||||||
|
out = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
accelerator_inline void convertType(ComplexF & out, const std::complex<float> & in) {
|
||||||
|
out = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef GRID_SIMT
|
||||||
|
accelerator_inline void convertType(vComplexF & out, const ComplexF & in) {
|
||||||
|
((ComplexF*)&out)[SIMTlane(vComplexF::Nsimd())] = in;
|
||||||
|
}
|
||||||
|
accelerator_inline void convertType(vComplexD & out, const ComplexD & in) {
|
||||||
|
((ComplexD*)&out)[SIMTlane(vComplexD::Nsimd())] = in;
|
||||||
|
}
|
||||||
|
accelerator_inline void convertType(vComplexD2 & out, const ComplexD & in) {
|
||||||
|
((ComplexD*)&out)[SIMTlane(vComplexD::Nsimd()*2)] = in;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
accelerator_inline void convertType(vComplexF & out, const vComplexD2 & in) {
|
||||||
|
out.v = Optimization::PrecisionChange::DtoS(in._internal[0].v,in._internal[1].v);
|
||||||
|
}
|
||||||
|
|
||||||
|
accelerator_inline void convertType(vComplexD2 & out, const vComplexF & in) {
|
||||||
|
Optimization::PrecisionChange::StoD(in.v,out._internal[0].v,out._internal[1].v);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T1,typename T2,int N>
|
||||||
|
accelerator_inline void convertType(iMatrix<T1,N> & out, const iMatrix<T2,N> & in);
|
||||||
|
template<typename T1,typename T2,int N>
|
||||||
|
accelerator_inline void convertType(iVector<T1,N> & out, const iVector<T2,N> & in);
|
||||||
|
|
||||||
|
template<typename T1,typename T2, typename std::enable_if<!isGridScalar<T1>::value, T1>::type* = nullptr>
|
||||||
|
accelerator_inline void convertType(T1 & out, const iScalar<T2> & in) {
|
||||||
|
convertType(out,in._internal);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T1,typename T2>
|
||||||
|
accelerator_inline void convertType(iScalar<T1> & out, const T2 & in) {
|
||||||
|
convertType(out._internal,in);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T1,typename T2,int N>
|
||||||
|
accelerator_inline void convertType(iMatrix<T1,N> & out, const iMatrix<T2,N> & in) {
|
||||||
|
for (int i=0;i<N;i++)
|
||||||
|
for (int j=0;j<N;j++)
|
||||||
|
convertType(out._internal[i][j],in._internal[i][j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T1,typename T2,int N>
|
||||||
|
accelerator_inline void convertType(iVector<T1,N> & out, const iVector<T2,N> & in) {
|
||||||
|
for (int i=0;i<N;i++)
|
||||||
|
convertType(out._internal[i],in._internal[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename std::enable_if<isGridFundamental<T>::value, T>::type* = nullptr>
|
||||||
|
accelerator_inline void convertType(T & out, const T & in) {
|
||||||
|
out = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T1,typename T2>
|
||||||
|
accelerator_inline void convertType(Lattice<T1> & out, const Lattice<T2> & in) {
|
||||||
|
autoView( out_v , out,AcceleratorWrite);
|
||||||
|
autoView( in_v , in ,AcceleratorRead);
|
||||||
|
accelerator_for(ss,out_v.size(),T1::Nsimd(),{
|
||||||
|
convertType(out_v[ss],in_v(ss));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// precision-promoted local inner product
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class vobj>
|
||||||
|
inline auto localInnerProductD(const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
|
||||||
|
-> Lattice<iScalar<decltype(TensorRemove(innerProductD2(lhs.View()[0],rhs.View()[0])))>>
|
||||||
|
{
|
||||||
|
autoView( lhs_v , lhs, AcceleratorRead);
|
||||||
|
autoView( rhs_v , rhs, AcceleratorRead);
|
||||||
|
|
||||||
|
typedef decltype(TensorRemove(innerProductD2(lhs_v[0],rhs_v[0]))) t_inner;
|
||||||
|
Lattice<iScalar<t_inner>> ret(lhs.Grid());
|
||||||
|
|
||||||
|
{
|
||||||
|
autoView(ret_v, ret,AcceleratorWrite);
|
||||||
|
accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{
|
||||||
|
convertType(ret_v[ss],innerProductD2(lhs_v(ss),rhs_v(ss)));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// block routines
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class vobj,class CComplex,int nbasis,class VLattice>
|
||||||
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
const Lattice<vobj> &fineData,
|
const Lattice<vobj> &fineData,
|
||||||
const std::vector<Lattice<vobj> > &Basis)
|
const VLattice &Basis)
|
||||||
{
|
{
|
||||||
GridBase * fine = fineData.Grid();
|
GridBase * fine = fineData.Grid();
|
||||||
GridBase * coarse= coarseData.Grid();
|
GridBase * coarse= coarseData.Grid();
|
||||||
|
|
||||||
Lattice<CComplex> ip(coarse);
|
Lattice<iScalar<CComplex>> ip(coarse);
|
||||||
|
Lattice<vobj> fineDataRed = fineData;
|
||||||
|
|
||||||
autoView( coarseData_ , coarseData, AcceleratorWrite);
|
autoView( coarseData_ , coarseData, AcceleratorWrite);
|
||||||
autoView( ip_ , ip, AcceleratorWrite);
|
autoView( ip_ , ip, AcceleratorWrite);
|
||||||
for(int v=0;v<nbasis;v++) {
|
for(int v=0;v<nbasis;v++) {
|
||||||
blockInnerProduct(ip,Basis[v],fineData);
|
blockInnerProductD(ip,Basis[v],fineDataRed); // ip = <basis|fine>
|
||||||
accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), {
|
accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), {
|
||||||
coalescedWrite(coarseData_[sc](v),ip_(sc));
|
convertType(coarseData_[sc](v),ip_[sc]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// improve numerical stability of projection
|
||||||
|
// |fine> = |fine> - <basis|fine> |basis>
|
||||||
|
ip=-ip;
|
||||||
|
blockZAXPY(fineDataRed,ip,Basis[v],fineDataRed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj,class CComplex>
|
|
||||||
|
template<class vobj,class vobj2,class CComplex>
|
||||||
inline void blockZAXPY(Lattice<vobj> &fineZ,
|
inline void blockZAXPY(Lattice<vobj> &fineZ,
|
||||||
const Lattice<CComplex> &coarseA,
|
const Lattice<CComplex> &coarseA,
|
||||||
const Lattice<vobj> &fineX,
|
const Lattice<vobj2> &fineX,
|
||||||
const Lattice<vobj> &fineY)
|
const Lattice<vobj> &fineY)
|
||||||
{
|
{
|
||||||
GridBase * fine = fineZ.Grid();
|
GridBase * fine = fineZ.Grid();
|
||||||
@ -145,13 +252,50 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
|||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||||
|
|
||||||
// z = A x + y
|
// z = A x + y
|
||||||
coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf));
|
#ifdef GRID_SIMT
|
||||||
|
typename vobj2::tensor_reduced::scalar_object cA;
|
||||||
|
typename vobj::scalar_object cAx;
|
||||||
|
#else
|
||||||
|
typename vobj2::tensor_reduced cA;
|
||||||
|
vobj cAx;
|
||||||
|
#endif
|
||||||
|
convertType(cA,TensorRemove(coarseA_(sc)));
|
||||||
|
auto prod = cA*fineX_(sf);
|
||||||
|
convertType(cAx,prod);
|
||||||
|
coalescedWrite(fineZ_[sf],cAx+fineY_(sf));
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj,class CComplex>
|
template<class vobj,class CComplex>
|
||||||
|
inline void blockInnerProductD(Lattice<CComplex> &CoarseInner,
|
||||||
|
const Lattice<vobj> &fineX,
|
||||||
|
const Lattice<vobj> &fineY)
|
||||||
|
{
|
||||||
|
typedef iScalar<decltype(TensorRemove(innerProductD2(vobj(),vobj())))> dotp;
|
||||||
|
|
||||||
|
GridBase *coarse(CoarseInner.Grid());
|
||||||
|
GridBase *fine (fineX.Grid());
|
||||||
|
|
||||||
|
Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
|
||||||
|
Lattice<dotp> coarse_inner(coarse);
|
||||||
|
|
||||||
|
// Precision promotion
|
||||||
|
fine_inner = localInnerProductD(fineX,fineY);
|
||||||
|
blockSum(coarse_inner,fine_inner);
|
||||||
|
{
|
||||||
|
autoView( CoarseInner_ , CoarseInner,AcceleratorWrite);
|
||||||
|
autoView( coarse_inner_ , coarse_inner,AcceleratorRead);
|
||||||
|
accelerator_for(ss, coarse->oSites(), 1, {
|
||||||
|
convertType(CoarseInner_[ss], TensorRemove(coarse_inner_[ss]));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class CComplex> // deprecate
|
||||||
inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
||||||
const Lattice<vobj> &fineX,
|
const Lattice<vobj> &fineX,
|
||||||
const Lattice<vobj> &fineY)
|
const Lattice<vobj> &fineY)
|
||||||
@ -167,12 +311,15 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
|||||||
// Precision promotion?
|
// Precision promotion?
|
||||||
fine_inner = localInnerProduct(fineX,fineY);
|
fine_inner = localInnerProduct(fineX,fineY);
|
||||||
blockSum(coarse_inner,fine_inner);
|
blockSum(coarse_inner,fine_inner);
|
||||||
|
{
|
||||||
autoView( CoarseInner_ , CoarseInner, AcceleratorWrite);
|
autoView( CoarseInner_ , CoarseInner, AcceleratorWrite);
|
||||||
autoView( coarse_inner_ , coarse_inner, AcceleratorRead);
|
autoView( coarse_inner_ , coarse_inner, AcceleratorRead);
|
||||||
accelerator_for(ss, coarse->oSites(), 1, {
|
accelerator_for(ss, coarse->oSites(), 1, {
|
||||||
CoarseInner_[ss] = coarse_inner_[ss];
|
CoarseInner_[ss] = coarse_inner_[ss];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class vobj,class CComplex>
|
template<class vobj,class CComplex>
|
||||||
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
||||||
{
|
{
|
||||||
@ -229,6 +376,7 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vobj> &picked,Coordinate coor)
|
inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vobj> &picked,Coordinate coor)
|
||||||
{
|
{
|
||||||
@ -250,8 +398,8 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj,class CComplex>
|
template<class CComplex,class VLattice>
|
||||||
inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> > &Basis)
|
inline void blockOrthonormalize(Lattice<CComplex> &ip,VLattice &Basis)
|
||||||
{
|
{
|
||||||
GridBase *coarse = ip.Grid();
|
GridBase *coarse = ip.Grid();
|
||||||
GridBase *fine = Basis[0].Grid();
|
GridBase *fine = Basis[0].Grid();
|
||||||
@ -267,15 +415,22 @@ inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> >
|
|||||||
for(int v=0;v<nbasis;v++) {
|
for(int v=0;v<nbasis;v++) {
|
||||||
for(int u=0;u<v;u++) {
|
for(int u=0;u<v;u++) {
|
||||||
//Inner product & remove component
|
//Inner product & remove component
|
||||||
blockInnerProduct(ip,Basis[u],Basis[v]);
|
blockInnerProductD(ip,Basis[u],Basis[v]);
|
||||||
ip = -ip;
|
ip = -ip;
|
||||||
blockZAXPY<vobj,CComplex> (Basis[v],ip,Basis[u],Basis[v]);
|
blockZAXPY(Basis[v],ip,Basis[u],Basis[v]);
|
||||||
}
|
}
|
||||||
blockNormalise(ip,Basis[v]);
|
blockNormalise(ip,Basis[v]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class vobj,class CComplex>
|
||||||
|
inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> > &Basis) // deprecated inaccurate naming
|
||||||
|
{
|
||||||
|
blockOrthonormalize(ip,Basis);
|
||||||
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
// TODO: CPU optimized version here
|
||||||
template<class vobj,class CComplex,int nbasis>
|
template<class vobj,class CComplex,int nbasis>
|
||||||
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
Lattice<vobj> &fineData,
|
Lattice<vobj> &fineData,
|
||||||
@ -320,17 +475,17 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
template<class vobj,class CComplex,int nbasis>
|
template<class vobj,class CComplex,int nbasis,class VLattice>
|
||||||
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
Lattice<vobj> &fineData,
|
Lattice<vobj> &fineData,
|
||||||
const std::vector<Lattice<vobj> > &Basis)
|
const VLattice &Basis)
|
||||||
{
|
{
|
||||||
GridBase * fine = fineData.Grid();
|
GridBase * fine = fineData.Grid();
|
||||||
GridBase * coarse= coarseData.Grid();
|
GridBase * coarse= coarseData.Grid();
|
||||||
|
|
||||||
fineData=Zero();
|
fineData=Zero();
|
||||||
for(int i=0;i<nbasis;i++) {
|
for(int i=0;i<nbasis;i++) {
|
||||||
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
|
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
|
||||||
|
|
||||||
Lattice<CComplex> cip(coarse);
|
Lattice<CComplex> cip(coarse);
|
||||||
autoView( cip_ , cip, AcceleratorWrite);
|
autoView( cip_ , cip, AcceleratorWrite);
|
||||||
autoView( ip_ , ip, AcceleratorRead);
|
autoView( ip_ , ip, AcceleratorRead);
|
||||||
@ -407,6 +562,7 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
|
|||||||
Coordinate rdt = Tg->_rdimensions;
|
Coordinate rdt = Tg->_rdimensions;
|
||||||
Coordinate ist = Tg->_istride;
|
Coordinate ist = Tg->_istride;
|
||||||
Coordinate ost = Tg->_ostride;
|
Coordinate ost = Tg->_ostride;
|
||||||
|
|
||||||
autoView( t_v , To, AcceleratorWrite);
|
autoView( t_v , To, AcceleratorWrite);
|
||||||
autoView( f_v , From, AcceleratorRead);
|
autoView( f_v , From, AcceleratorRead);
|
||||||
accelerator_for(idx,Fg->lSites(),1,{
|
accelerator_for(idx,Fg->lSites(),1,{
|
||||||
|
@ -38,6 +38,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Transpose
|
// Transpose
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/*
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||||
Lattice<vobj> ret(lhs.Grid());
|
Lattice<vobj> ret(lhs.Grid());
|
||||||
@ -48,6 +49,7 @@ inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
|||||||
});
|
});
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Index level dependent transpose
|
// Index level dependent transpose
|
||||||
|
@ -341,7 +341,7 @@ class BinaryIO {
|
|||||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||||
int ieee32 = (format == std::string("IEEE32"));
|
int ieee32 = (format == std::string("IEEE32"));
|
||||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
int ieee64 = (format == std::string("IEEE64"));
|
int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE"));
|
||||||
assert(ieee64||ieee32|ieee64big||ieee32big);
|
assert(ieee64||ieee32|ieee64big||ieee32big);
|
||||||
assert((ieee64+ieee32+ieee64big+ieee32big)==1);
|
assert((ieee64+ieee32+ieee64big+ieee32big)==1);
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -301,6 +301,30 @@ struct GaugeSimpleUnmunger {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<class fobj,class sobj>
|
||||||
|
struct GaugeDoubleStoredMunger{
|
||||||
|
void operator()(fobj &in, sobj &out) {
|
||||||
|
for (int mu = 0; mu < Nds; mu++) {
|
||||||
|
for (int i = 0; i < Nc; i++) {
|
||||||
|
for (int j = 0; j < Nc; j++) {
|
||||||
|
out(mu)()(i, j) = in(mu)()(i, j);
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class fobj, class sobj>
|
||||||
|
struct GaugeDoubleStoredUnmunger {
|
||||||
|
void operator()(sobj &in, fobj &out) {
|
||||||
|
for (int mu = 0; mu < Nds; mu++) {
|
||||||
|
for (int i = 0; i < Nc; i++) {
|
||||||
|
for (int j = 0; j < Nc; j++) {
|
||||||
|
out(mu)()(i, j) = in(mu)()(i, j);
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
template<class fobj,class sobj>
|
template<class fobj,class sobj>
|
||||||
struct Gauge3x2munger{
|
struct Gauge3x2munger{
|
||||||
void operator() (fobj &in,sobj &out){
|
void operator() (fobj &in,sobj &out){
|
||||||
|
@ -146,7 +146,7 @@ public:
|
|||||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||||
int ieee32 = (format == std::string("IEEE32"));
|
int ieee32 = (format == std::string("IEEE32"));
|
||||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
int ieee64 = (format == std::string("IEEE64"));
|
int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE"));
|
||||||
|
|
||||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||||
// depending on datatype, set up munger;
|
// depending on datatype, set up munger;
|
||||||
|
224
Grid/parallelIO/OpenQcdIO.h
Normal file
224
Grid/parallelIO/OpenQcdIO.h
Normal file
@ -0,0 +1,224 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/parallelIO/OpenQcdIO.h
|
||||||
|
|
||||||
|
Copyright (C) 2015 - 2020
|
||||||
|
|
||||||
|
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
struct OpenQcdHeader : Serializable {
|
||||||
|
GRID_SERIALIZABLE_CLASS_MEMBERS(OpenQcdHeader,
|
||||||
|
int, Nt,
|
||||||
|
int, Nx,
|
||||||
|
int, Ny,
|
||||||
|
int, Nz,
|
||||||
|
double, plaq);
|
||||||
|
};
|
||||||
|
|
||||||
|
class OpenQcdIO : public BinaryIO {
|
||||||
|
public:
|
||||||
|
static constexpr double normalisationFactor = Nc; // normalisation difference: grid 18, openqcd 6
|
||||||
|
|
||||||
|
static inline int readHeader(std::string file, GridBase* grid, FieldMetaData& field) {
|
||||||
|
OpenQcdHeader header;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::ifstream fin(file, std::ios::in | std::ios::binary);
|
||||||
|
fin.read(reinterpret_cast<char*>(&header), sizeof(OpenQcdHeader));
|
||||||
|
assert(!fin.fail());
|
||||||
|
field.data_start = fin.tellg();
|
||||||
|
fin.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
header.plaq /= normalisationFactor;
|
||||||
|
|
||||||
|
// sanity check (should trigger on endian issues)
|
||||||
|
assert(0 < header.Nt && header.Nt <= 1024);
|
||||||
|
assert(0 < header.Nx && header.Nx <= 1024);
|
||||||
|
assert(0 < header.Ny && header.Ny <= 1024);
|
||||||
|
assert(0 < header.Nz && header.Nz <= 1024);
|
||||||
|
|
||||||
|
field.dimension[0] = header.Nx;
|
||||||
|
field.dimension[1] = header.Ny;
|
||||||
|
field.dimension[2] = header.Nz;
|
||||||
|
field.dimension[3] = header.Nt;
|
||||||
|
|
||||||
|
std::cout << GridLogDebug << "header: " << header << std::endl;
|
||||||
|
std::cout << GridLogDebug << "grid dimensions: " << grid->_fdimensions << std::endl;
|
||||||
|
std::cout << GridLogDebug << "file dimensions: " << field.dimension << std::endl;
|
||||||
|
|
||||||
|
assert(grid->_ndimension == Nd);
|
||||||
|
for(int d = 0; d < Nd; d++)
|
||||||
|
assert(grid->_fdimensions[d] == field.dimension[d]);
|
||||||
|
|
||||||
|
field.plaquette = header.plaq;
|
||||||
|
|
||||||
|
return field.data_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vsimd>
|
||||||
|
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||||
|
FieldMetaData& header,
|
||||||
|
std::string file) {
|
||||||
|
typedef Lattice<iDoubleStoredColourMatrix<vsimd>> DoubleStoredGaugeField;
|
||||||
|
|
||||||
|
assert(Ns == 4 and Nd == 4 and Nc == 3);
|
||||||
|
|
||||||
|
auto grid = dynamic_cast<GridCartesian*>(Umu.Grid());
|
||||||
|
assert(grid != nullptr); assert(grid->_ndimension == Nd);
|
||||||
|
|
||||||
|
uint64_t offset = readHeader(file, Umu.Grid(), header);
|
||||||
|
|
||||||
|
FieldMetaData clone(header);
|
||||||
|
|
||||||
|
std::string format("IEEE64"); // they always store little endian double precsision
|
||||||
|
uint32_t nersc_csum, scidac_csuma, scidac_csumb;
|
||||||
|
|
||||||
|
GridCartesian* grid_openqcd = createOpenQcdGrid(grid);
|
||||||
|
GridRedBlackCartesian* grid_rb = SpaceTimeGrid::makeFourDimRedBlackGrid(grid);
|
||||||
|
|
||||||
|
typedef DoubleStoredColourMatrixD fobj;
|
||||||
|
typedef typename DoubleStoredGaugeField::vector_object::scalar_object sobj;
|
||||||
|
typedef typename DoubleStoredGaugeField::vector_object::Realified::scalar_type word;
|
||||||
|
|
||||||
|
word w = 0;
|
||||||
|
|
||||||
|
std::vector<fobj> iodata(grid_openqcd->lSites()); // Munge, checksum, byte order in here
|
||||||
|
std::vector<sobj> scalardata(grid->lSites());
|
||||||
|
|
||||||
|
IOobject(w, grid_openqcd, iodata, file, offset, format, BINARYIO_READ | BINARYIO_LEXICOGRAPHIC,
|
||||||
|
nersc_csum, scidac_csuma, scidac_csumb);
|
||||||
|
|
||||||
|
GridStopWatch timer;
|
||||||
|
timer.Start();
|
||||||
|
|
||||||
|
DoubleStoredGaugeField Umu_ds(grid);
|
||||||
|
|
||||||
|
auto munge = GaugeDoubleStoredMunger<DoubleStoredColourMatrixD, DoubleStoredColourMatrix>();
|
||||||
|
|
||||||
|
Coordinate ldim = grid->LocalDimensions();
|
||||||
|
thread_for(idx_g, grid->lSites(), {
|
||||||
|
Coordinate coor;
|
||||||
|
grid->LocalIndexToLocalCoor(idx_g, coor);
|
||||||
|
|
||||||
|
bool isOdd = grid_rb->CheckerBoard(coor) == Odd;
|
||||||
|
|
||||||
|
if(!isOdd) continue;
|
||||||
|
|
||||||
|
int idx_o = (coor[Tdir] * ldim[Xdir] * ldim[Ydir] * ldim[Zdir]
|
||||||
|
+ coor[Xdir] * ldim[Ydir] * ldim[Zdir]
|
||||||
|
+ coor[Ydir] * ldim[Zdir]
|
||||||
|
+ coor[Zdir])/2;
|
||||||
|
|
||||||
|
munge(iodata[idx_o], scalardata[idx_g]);
|
||||||
|
});
|
||||||
|
|
||||||
|
grid->Barrier(); timer.Stop();
|
||||||
|
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: munge overhead " << timer.Elapsed() << std::endl;
|
||||||
|
|
||||||
|
timer.Reset(); timer.Start();
|
||||||
|
|
||||||
|
vectorizeFromLexOrdArray(scalardata, Umu_ds);
|
||||||
|
|
||||||
|
grid->Barrier(); timer.Stop();
|
||||||
|
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: vectorize overhead " << timer.Elapsed() << std::endl;
|
||||||
|
|
||||||
|
timer.Reset(); timer.Start();
|
||||||
|
|
||||||
|
undoDoubleStore(Umu, Umu_ds);
|
||||||
|
|
||||||
|
grid->Barrier(); timer.Stop();
|
||||||
|
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: redistribute overhead " << timer.Elapsed() << std::endl;
|
||||||
|
|
||||||
|
GaugeStatistics(Umu, clone);
|
||||||
|
|
||||||
|
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
std::cout << GridLogMessage << "OpenQcd Configuration " << file
|
||||||
|
<< " plaquette " << clone.plaquette
|
||||||
|
<< " header " << header.plaquette
|
||||||
|
<< " difference " << plaq_diff
|
||||||
|
<< std::endl;
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RealD precTol = (getPrecision<vsimd>::value == 1) ? 2e-7 : 2e-15;
|
||||||
|
RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code
|
||||||
|
|
||||||
|
if(plaq_diff >= tol)
|
||||||
|
std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl;
|
||||||
|
assert(plaq_diff < tol);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vsimd>
|
||||||
|
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||||
|
std::string file) {
|
||||||
|
std::cout << GridLogError << "Writing to openQCD file format is not implemented" << std::endl;
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static inline GridCartesian* createOpenQcdGrid(GridCartesian* grid) {
|
||||||
|
// exploit GridCartesian to be able to still use IOobject
|
||||||
|
Coordinate gdim = grid->GlobalDimensions();
|
||||||
|
Coordinate ldim = grid->LocalDimensions();
|
||||||
|
Coordinate pcoor = grid->ThisProcessorCoor();
|
||||||
|
|
||||||
|
// openqcd does rb on the z direction
|
||||||
|
gdim[Zdir] /= 2;
|
||||||
|
ldim[Zdir] /= 2;
|
||||||
|
|
||||||
|
// and has the order T X Y Z (from slowest to fastest)
|
||||||
|
std::swap(gdim[Xdir], gdim[Zdir]);
|
||||||
|
std::swap(ldim[Xdir], ldim[Zdir]);
|
||||||
|
std::swap(pcoor[Xdir], pcoor[Zdir]);
|
||||||
|
|
||||||
|
GridCartesian* ret = SpaceTimeGrid::makeFourDimGrid(gdim, grid->_simd_layout, grid->ProcessorGrid());
|
||||||
|
ret->_ldimensions = ldim;
|
||||||
|
ret->_processor_coor = pcoor;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vsimd>
|
||||||
|
static inline void undoDoubleStore(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||||
|
Lattice<iDoubleStoredColourMatrix<vsimd>> const& Umu_ds) {
|
||||||
|
conformable(Umu.Grid(), Umu_ds.Grid());
|
||||||
|
Lattice<iColourMatrix<vsimd>> U(Umu.Grid());
|
||||||
|
|
||||||
|
// they store T+, T-, X+, X-, Y+, Y-, Z+, Z-
|
||||||
|
for(int mu_g = 0; mu_g < Nd; ++mu_g) {
|
||||||
|
int mu_o = (mu_g + 1) % Nd;
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu_ds, 2 * mu_o)
|
||||||
|
+ Cshift(PeekIndex<LorentzIndex>(Umu_ds, 2 * mu_o + 1), mu_g, +1);
|
||||||
|
PokeIndex<LorentzIndex>(Umu, U, mu_g);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
NAMESPACE_END(Grid);
|
281
Grid/parallelIO/OpenQcdIOChromaReference.h
Normal file
281
Grid/parallelIO/OpenQcdIOChromaReference.h
Normal file
@ -0,0 +1,281 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/parallelIO/OpenQcdIOChromaReference.h
|
||||||
|
|
||||||
|
Copyright (C) 2015 - 2020
|
||||||
|
|
||||||
|
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <ios>
|
||||||
|
#include <iostream>
|
||||||
|
#include <limits>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <mpi.h>
|
||||||
|
#include <ostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#define CHECK {std::cerr << __FILE__ << " @l " << __LINE__ << ": CHECK" << grid->ThisRank() << std::endl;}
|
||||||
|
#define CHECK_VAR(a) { std::cerr << __FILE__ << "@l" << __LINE__ << " on "<< grid->ThisRank() << ": " << __func__ << " " << #a << "=" << (a) << std::endl; }
|
||||||
|
// #undef CHECK
|
||||||
|
// #define CHECK
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
class ParRdr {
|
||||||
|
private:
|
||||||
|
bool const swap;
|
||||||
|
|
||||||
|
MPI_Status status;
|
||||||
|
MPI_File fp;
|
||||||
|
|
||||||
|
int err;
|
||||||
|
|
||||||
|
MPI_Datatype oddSiteType;
|
||||||
|
MPI_Datatype fileViewType;
|
||||||
|
|
||||||
|
GridBase* grid;
|
||||||
|
|
||||||
|
public:
|
||||||
|
ParRdr(MPI_Comm comm, std::string const& filename, GridBase* gridPtr)
|
||||||
|
: swap(false)
|
||||||
|
, grid(gridPtr) {
|
||||||
|
err = MPI_File_open(comm, const_cast<char*>(filename.c_str()), MPI_MODE_RDONLY, MPI_INFO_NULL, &fp);
|
||||||
|
assert(err == MPI_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~ParRdr() { MPI_File_close(&fp); }
|
||||||
|
|
||||||
|
inline void errInfo(int const err, std::string const& func) {
|
||||||
|
static char estring[MPI_MAX_ERROR_STRING];
|
||||||
|
int eclass = -1, len = 0;
|
||||||
|
MPI_Error_class(err, &eclass);
|
||||||
|
MPI_Error_string(err, estring, &len);
|
||||||
|
std::cerr << func << " - Error " << eclass << ": " << estring << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readHeader(FieldMetaData& field) {
|
||||||
|
assert((grid->_ndimension == Nd) && (Nd == 4));
|
||||||
|
assert(Nc == 3);
|
||||||
|
|
||||||
|
OpenQcdHeader header;
|
||||||
|
|
||||||
|
readBlock(reinterpret_cast<char*>(&header), 0, sizeof(OpenQcdHeader), MPI_CHAR);
|
||||||
|
|
||||||
|
header.plaq /= 3.; // TODO change this into normalizationfactor
|
||||||
|
|
||||||
|
// sanity check (should trigger on endian issues) TODO remove?
|
||||||
|
assert(0 < header.Nt && header.Nt <= 1024);
|
||||||
|
assert(0 < header.Nx && header.Nx <= 1024);
|
||||||
|
assert(0 < header.Ny && header.Ny <= 1024);
|
||||||
|
assert(0 < header.Nz && header.Nz <= 1024);
|
||||||
|
|
||||||
|
field.dimension[0] = header.Nx;
|
||||||
|
field.dimension[1] = header.Ny;
|
||||||
|
field.dimension[2] = header.Nz;
|
||||||
|
field.dimension[3] = header.Nt;
|
||||||
|
|
||||||
|
for(int d = 0; d < Nd; d++)
|
||||||
|
assert(grid->FullDimensions()[d] == field.dimension[d]);
|
||||||
|
|
||||||
|
field.plaquette = header.plaq;
|
||||||
|
|
||||||
|
field.data_start = sizeof(OpenQcdHeader);
|
||||||
|
|
||||||
|
return field.data_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readBlock(void* const dest, uint64_t const pos, uint64_t const nbytes, MPI_Datatype const datatype) {
|
||||||
|
err = MPI_File_read_at_all(fp, pos, dest, nbytes, datatype, &status);
|
||||||
|
errInfo(err, "MPI_File_read_at_all");
|
||||||
|
// CHECK_VAR(err)
|
||||||
|
|
||||||
|
int read = -1;
|
||||||
|
MPI_Get_count(&status, datatype, &read);
|
||||||
|
// CHECK_VAR(read)
|
||||||
|
assert(nbytes == (uint64_t)read);
|
||||||
|
assert(err == MPI_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void createTypes() {
|
||||||
|
constexpr int elem_size = Nd * 2 * 2 * Nc * Nc * sizeof(double); // 2_complex 2_fwdbwd
|
||||||
|
|
||||||
|
err = MPI_Type_contiguous(elem_size, MPI_BYTE, &oddSiteType); assert(err == MPI_SUCCESS);
|
||||||
|
err = MPI_Type_commit(&oddSiteType); assert(err == MPI_SUCCESS);
|
||||||
|
|
||||||
|
Coordinate const L = grid->GlobalDimensions();
|
||||||
|
Coordinate const l = grid->LocalDimensions();
|
||||||
|
Coordinate const i = grid->ThisProcessorCoor();
|
||||||
|
|
||||||
|
Coordinate sizes({L[2] / 2, L[1], L[0], L[3]});
|
||||||
|
Coordinate subsizes({l[2] / 2, l[1], l[0], l[3]});
|
||||||
|
Coordinate starts({i[2] * l[2] / 2, i[1] * l[1], i[0] * l[0], i[3] * l[3]});
|
||||||
|
|
||||||
|
err = MPI_Type_create_subarray(grid->_ndimension, &sizes[0], &subsizes[0], &starts[0], MPI_ORDER_FORTRAN, oddSiteType, &fileViewType); assert(err == MPI_SUCCESS);
|
||||||
|
err = MPI_Type_commit(&fileViewType); assert(err == MPI_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeTypes() {
|
||||||
|
err = MPI_Type_free(&fileViewType); assert(err == MPI_SUCCESS);
|
||||||
|
err = MPI_Type_free(&oddSiteType); assert(err == MPI_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool readGauge(std::vector<ColourMatrixD>& domain_buff, FieldMetaData& meta) {
|
||||||
|
auto hdr_offset = readHeader(meta);
|
||||||
|
CHECK
|
||||||
|
createTypes();
|
||||||
|
err = MPI_File_set_view(fp, hdr_offset, oddSiteType, fileViewType, "native", MPI_INFO_NULL); errInfo(err, "MPI_File_set_view0"); assert(err == MPI_SUCCESS);
|
||||||
|
CHECK
|
||||||
|
int const domainSites = grid->lSites();
|
||||||
|
domain_buff.resize(Nd * domainSites); // 2_fwdbwd * 4_Nd * domainSites / 2_onlyodd
|
||||||
|
|
||||||
|
// the actual READ
|
||||||
|
constexpr uint64_t cm_size = 2 * Nc * Nc * sizeof(double); // 2_complex
|
||||||
|
constexpr uint64_t os_size = Nd * 2 * cm_size; // 2_fwdbwd
|
||||||
|
constexpr uint64_t max_elems = std::numeric_limits<int>::max(); // int adressable elems: floor is fine
|
||||||
|
uint64_t const n_os = domainSites / 2;
|
||||||
|
|
||||||
|
for(uint64_t os_idx = 0; os_idx < n_os;) {
|
||||||
|
uint64_t const read_os = os_idx + max_elems <= n_os ? max_elems : n_os - os_idx;
|
||||||
|
uint64_t const cm = os_idx * Nd * 2;
|
||||||
|
readBlock(&(domain_buff[cm]), os_idx, read_os, oddSiteType);
|
||||||
|
os_idx += read_os;
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK
|
||||||
|
err = MPI_File_set_view(fp, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL);
|
||||||
|
errInfo(err, "MPI_File_set_view1");
|
||||||
|
assert(err == MPI_SUCCESS);
|
||||||
|
freeTypes();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "read sum: " << n_os * os_size << " bytes" << std::endl;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OpenQcdIOChromaReference : public BinaryIO {
|
||||||
|
public:
|
||||||
|
template<class vsimd>
|
||||||
|
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||||
|
Grid::FieldMetaData& header,
|
||||||
|
std::string file) {
|
||||||
|
typedef Lattice<iDoubleStoredColourMatrix<vsimd>> DoubledGaugeField;
|
||||||
|
|
||||||
|
assert(Ns == 4 and Nd == 4 and Nc == 3);
|
||||||
|
|
||||||
|
auto grid = Umu.Grid();
|
||||||
|
|
||||||
|
typedef ColourMatrixD fobj;
|
||||||
|
|
||||||
|
std::vector<fobj> iodata(
|
||||||
|
Nd * grid->lSites()); // actual size = 2*Nd*lsites but have only lsites/2 sites in file
|
||||||
|
|
||||||
|
{
|
||||||
|
ParRdr rdr(MPI_COMM_WORLD, file, grid);
|
||||||
|
rdr.readGauge(iodata, header);
|
||||||
|
} // equivalent to using binaryio
|
||||||
|
|
||||||
|
std::vector<iDoubleStoredColourMatrix<typename vsimd::scalar_type>> Umu_ds_scalar(grid->lSites());
|
||||||
|
|
||||||
|
copyToLatticeObject(Umu_ds_scalar, iodata, grid); // equivalent to munging
|
||||||
|
|
||||||
|
DoubledGaugeField Umu_ds(grid);
|
||||||
|
|
||||||
|
vectorizeFromLexOrdArray(Umu_ds_scalar, Umu_ds);
|
||||||
|
|
||||||
|
redistribute(Umu, Umu_ds); // equivalent to undoDoublestore
|
||||||
|
|
||||||
|
FieldMetaData clone(header);
|
||||||
|
|
||||||
|
GaugeStatistics(Umu, clone);
|
||||||
|
|
||||||
|
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
std::cout << GridLogMessage << "OpenQcd Configuration " << file
|
||||||
|
<< " plaquette " << clone.plaquette
|
||||||
|
<< " header " << header.plaquette
|
||||||
|
<< " difference " << plaq_diff
|
||||||
|
<< std::endl;
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
RealD precTol = (getPrecision<vsimd>::value == 1) ? 2e-7 : 2e-15;
|
||||||
|
RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code
|
||||||
|
|
||||||
|
if(plaq_diff >= tol)
|
||||||
|
std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl;
|
||||||
|
assert(plaq_diff < tol);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<class vsimd>
|
||||||
|
static inline void redistribute(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||||
|
Lattice<iDoubleStoredColourMatrix<vsimd>> const& Umu_ds) {
|
||||||
|
Grid::conformable(Umu.Grid(), Umu_ds.Grid());
|
||||||
|
Lattice<iColourMatrix<vsimd>> U(Umu.Grid());
|
||||||
|
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu_ds, 2) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 3), 0, +1); PokeIndex<LorentzIndex>(Umu, U, 0);
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu_ds, 4) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 5), 1, +1); PokeIndex<LorentzIndex>(Umu, U, 1);
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu_ds, 6) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 7), 2, +1); PokeIndex<LorentzIndex>(Umu, U, 2);
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu_ds, 0) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 1), 3, +1); PokeIndex<LorentzIndex>(Umu, U, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void copyToLatticeObject(std::vector<DoubleStoredColourMatrix>& u_fb,
|
||||||
|
std::vector<ColourMatrixD> const& node_buff,
|
||||||
|
GridBase* grid) {
|
||||||
|
assert(node_buff.size() == Nd * grid->lSites());
|
||||||
|
|
||||||
|
Coordinate const& l = grid->LocalDimensions();
|
||||||
|
|
||||||
|
Coordinate coord(Nd);
|
||||||
|
int& x = coord[0];
|
||||||
|
int& y = coord[1];
|
||||||
|
int& z = coord[2];
|
||||||
|
int& t = coord[3];
|
||||||
|
|
||||||
|
int buff_idx = 0;
|
||||||
|
for(t = 0; t < l[3]; ++t) // IMPORTANT: openQCD file ordering
|
||||||
|
for(x = 0; x < l[0]; ++x)
|
||||||
|
for(y = 0; y < l[1]; ++y)
|
||||||
|
for(z = 0; z < l[2]; ++z) {
|
||||||
|
if((t + z + y + x) % 2 == 0) continue;
|
||||||
|
|
||||||
|
int local_idx;
|
||||||
|
Lexicographic::IndexFromCoor(coord, local_idx, grid->LocalDimensions());
|
||||||
|
for(int mu = 0; mu < 2 * Nd; ++mu)
|
||||||
|
for(int c1 = 0; c1 < Nc; ++c1) {
|
||||||
|
for(int c2 = 0; c2 < Nc; ++c2) {
|
||||||
|
u_fb[local_idx](mu)()(c1,c2) = node_buff[mu+buff_idx]()()(c1,c2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buff_idx += 2 * Nd;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(node_buff.size() == buff_idx);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
NAMESPACE_END(Grid);
|
@ -110,15 +110,15 @@ public:
|
|||||||
#endif
|
#endif
|
||||||
accumulator = std::chrono::duration_cast<GridUsecs>(start-start);
|
accumulator = std::chrono::duration_cast<GridUsecs>(start-start);
|
||||||
}
|
}
|
||||||
GridTime Elapsed(void) {
|
GridTime Elapsed(void) const {
|
||||||
assert(running == false);
|
assert(running == false);
|
||||||
return std::chrono::duration_cast<GridTime>( accumulator );
|
return std::chrono::duration_cast<GridTime>( accumulator );
|
||||||
}
|
}
|
||||||
uint64_t useconds(void){
|
uint64_t useconds(void) const {
|
||||||
assert(running == false);
|
assert(running == false);
|
||||||
return (uint64_t) accumulator.count();
|
return (uint64_t) accumulator.count();
|
||||||
}
|
}
|
||||||
bool isRunning(void){
|
bool isRunning(void) const {
|
||||||
return running;
|
return running;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -40,8 +40,8 @@ public:
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
virtual RealD M (const FermionField &in, FermionField &out);
|
virtual void M (const FermionField &in, FermionField &out);
|
||||||
virtual RealD Mdag (const FermionField &in, FermionField &out);
|
virtual void Mdag (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
virtual void Meooe (const FermionField &in, FermionField &out);
|
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||||
|
@ -41,8 +41,8 @@ public:
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
virtual RealD M (const FermionField &in, FermionField &out);
|
virtual void M (const FermionField &in, FermionField &out);
|
||||||
virtual RealD Mdag (const FermionField &in, FermionField &out);
|
virtual void Mdag (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
// half checkerboard operaions
|
// half checkerboard operaions
|
||||||
virtual void Meooe (const FermionField &in, FermionField &out);
|
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||||
|
@ -53,8 +53,8 @@ public:
|
|||||||
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
virtual RealD M (const FermionField& in, FermionField& out);
|
virtual void M (const FermionField& in, FermionField& out);
|
||||||
virtual RealD Mdag (const FermionField& in, FermionField& out);
|
virtual void Mdag (const FermionField& in, FermionField& out);
|
||||||
|
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
virtual void Mooee (const FermionField& in, FermionField& out);
|
virtual void Mooee (const FermionField& in, FermionField& out);
|
||||||
|
@ -58,8 +58,8 @@ public:
|
|||||||
virtual GridBase *GaugeRedBlackGrid(void) =0;
|
virtual GridBase *GaugeRedBlackGrid(void) =0;
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
virtual RealD M (const FermionField &in, FermionField &out)=0;
|
virtual void M (const FermionField &in, FermionField &out)=0;
|
||||||
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
|
virtual void Mdag (const FermionField &in, FermionField &out)=0;
|
||||||
|
|
||||||
// half checkerboard operaions
|
// half checkerboard operaions
|
||||||
virtual void Meooe (const FermionField &in, FermionField &out)=0;
|
virtual void Meooe (const FermionField &in, FermionField &out)=0;
|
||||||
@ -86,7 +86,6 @@ public:
|
|||||||
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
||||||
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
||||||
|
|
||||||
|
|
||||||
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
||||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||||
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||||
|
@ -71,8 +71,8 @@ public:
|
|||||||
// override multiply; cut number routines if pass dagger argument
|
// override multiply; cut number routines if pass dagger argument
|
||||||
// and also make interface more uniformly consistent
|
// and also make interface more uniformly consistent
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
RealD M(const FermionField &in, FermionField &out);
|
void M(const FermionField &in, FermionField &out);
|
||||||
RealD Mdag(const FermionField &in, FermionField &out);
|
void Mdag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
@ -74,8 +73,8 @@ public:
|
|||||||
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
|
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
|
||||||
|
|
||||||
// full checkerboard operations; leave unimplemented as abstract for now
|
// full checkerboard operations; leave unimplemented as abstract for now
|
||||||
RealD M (const FermionField &in, FermionField &out);
|
void M (const FermionField &in, FermionField &out);
|
||||||
RealD Mdag (const FermionField &in, FermionField &out);
|
void Mdag (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
void Meooe (const FermionField &in, FermionField &out);
|
void Meooe (const FermionField &in, FermionField &out);
|
||||||
|
@ -56,8 +56,8 @@ public:
|
|||||||
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
virtual RealD M (const FermionField& in, FermionField& out);
|
virtual void M (const FermionField& in, FermionField& out);
|
||||||
virtual RealD Mdag (const FermionField& in, FermionField& out);
|
virtual void Mdag (const FermionField& in, FermionField& out);
|
||||||
|
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
virtual void Mooee (const FermionField& in, FermionField& out);
|
virtual void Mooee (const FermionField& in, FermionField& out);
|
||||||
|
@ -59,7 +59,7 @@ public:
|
|||||||
{
|
{
|
||||||
RealD eps = 1.0;
|
RealD eps = 1.0;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
|
// std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
|
||||||
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
|
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
|
||||||
assert(zdata->n==this->Ls);
|
assert(zdata->n==this->Ls);
|
||||||
|
|
||||||
|
@ -71,8 +71,8 @@ public:
|
|||||||
// override multiply; cut number routines if pass dagger argument
|
// override multiply; cut number routines if pass dagger argument
|
||||||
// and also make interface more uniformly consistent
|
// and also make interface more uniformly consistent
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
RealD M(const FermionField &in, FermionField &out);
|
void M(const FermionField &in, FermionField &out);
|
||||||
RealD Mdag(const FermionField &in, FermionField &out);
|
void Mdag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
|
@ -47,8 +47,8 @@ public:
|
|||||||
void M_internal(const FermionField &in, FermionField &out,int dag);
|
void M_internal(const FermionField &in, FermionField &out,int dag);
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
virtual RealD M (const FermionField &in, FermionField &out);
|
virtual void M (const FermionField &in, FermionField &out);
|
||||||
virtual RealD Mdag (const FermionField &in, FermionField &out);
|
virtual void Mdag (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
// half checkerboard operaions
|
// half checkerboard operaions
|
||||||
virtual void Meooe (const FermionField &in, FermionField &out);
|
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||||
|
@ -109,9 +109,8 @@ public:
|
|||||||
ImportGauge(_Umu);
|
ImportGauge(_Umu);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual RealD M(const FermionField &in, FermionField &out);
|
virtual void M(const FermionField &in, FermionField &out);
|
||||||
virtual RealD Mdag(const FermionField &in, FermionField &out);
|
virtual void Mdag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
virtual void Mooee(const FermionField &in, FermionField &out);
|
virtual void Mooee(const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeDag(const FermionField &in, FermionField &out);
|
virtual void MooeeDag(const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInv(const FermionField &in, FermionField &out);
|
virtual void MooeeInv(const FermionField &in, FermionField &out);
|
||||||
|
@ -78,8 +78,8 @@ public:
|
|||||||
// override multiply; cut number routines if pass dagger argument
|
// override multiply; cut number routines if pass dagger argument
|
||||||
// and also make interface more uniformly consistent
|
// and also make interface more uniformly consistent
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
virtual RealD M(const FermionField &in, FermionField &out);
|
virtual void M(const FermionField &in, FermionField &out);
|
||||||
virtual RealD Mdag(const FermionField &in, FermionField &out);
|
virtual void Mdag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
@ -99,8 +98,8 @@ public:
|
|||||||
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
|
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
|
||||||
|
|
||||||
// full checkerboard operations; leave unimplemented as abstract for now
|
// full checkerboard operations; leave unimplemented as abstract for now
|
||||||
virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;};
|
virtual void M (const FermionField &in, FermionField &out){assert(0);};
|
||||||
virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
|
virtual void Mdag (const FermionField &in, FermionField &out){assert(0);};
|
||||||
|
|
||||||
// half checkerboard operations; leave unimplemented as abstract for now
|
// half checkerboard operations; leave unimplemented as abstract for now
|
||||||
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
|
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
|
||||||
|
@ -120,7 +120,8 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual RealD M(const FermionField &in, FermionField &out) {
|
virtual void M(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
this->Dhop(in, out, DaggerNo);
|
this->Dhop(in, out, DaggerNo);
|
||||||
FermionField tmp(out.Grid());
|
FermionField tmp(out.Grid());
|
||||||
@ -129,11 +130,12 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
|||||||
ComplexD b(0.0,this->mu[s]);
|
ComplexD b(0.0,this->mu[s]);
|
||||||
axpbg5y_ssp(tmp,a,in,b,in,s,s);
|
axpbg5y_ssp(tmp,a,in,b,in,s,s);
|
||||||
}
|
}
|
||||||
return axpy_norm(out, 1.0, tmp, out);
|
axpy(out, 1.0, tmp, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
// needed for fast PV
|
// needed for fast PV
|
||||||
void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu) {
|
void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu)
|
||||||
|
{
|
||||||
assert(_mass.size() == _mu.size());
|
assert(_mass.size() == _mu.size());
|
||||||
assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
|
assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
|
||||||
this->mass = _mass;
|
this->mass = _mass;
|
||||||
|
@ -323,7 +323,7 @@ void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
FermionField Din(psi.Grid());
|
FermionField Din(psi.Grid());
|
||||||
|
|
||||||
@ -335,11 +335,10 @@ RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
|||||||
axpby(chi,1.0,1.0,chi,psi);
|
axpby(chi,1.0,1.0,chi,psi);
|
||||||
|
|
||||||
M5D(psi,chi);
|
M5D(psi,chi);
|
||||||
return(norm2(chi));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
// Under adjoint
|
// Under adjoint
|
||||||
//D1+ D1- P- -> D1+^dag P+ D2-^dag
|
//D1+ D1- P- -> D1+^dag P+ D2-^dag
|
||||||
@ -354,7 +353,6 @@ RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
|||||||
M5Ddag(psi,chi);
|
M5Ddag(psi,chi);
|
||||||
// ((b D_W + D_w hop terms +1) on s-diag
|
// ((b D_W + D_w hop terms +1) on s-diag
|
||||||
axpby (chi,1.0,1.0,chi,psi);
|
axpby (chi,1.0,1.0,chi,psi);
|
||||||
return norm2(chi);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
|
@ -94,7 +94,7 @@ void ContinuedFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Ap
|
|||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
RealD ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
void ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
@ -116,15 +116,14 @@ RealD ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, F
|
|||||||
}
|
}
|
||||||
sign=-sign;
|
sign=-sign;
|
||||||
}
|
}
|
||||||
return norm2(chi);
|
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
RealD ContinuedFractionFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
void ContinuedFractionFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
// This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
|
// This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
|
||||||
// The rest of matrix is symmetric.
|
// The rest of matrix is symmetric.
|
||||||
// Can ignore "dag"
|
// Can ignore "dag"
|
||||||
return M(psi,chi);
|
M(psi,chi);
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
||||||
|
@ -89,7 +89,7 @@ void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionFiel
|
|||||||
/*****************************************************************************************************/
|
/*****************************************************************************************************/
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
void DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||||
{
|
{
|
||||||
FermionField Din(psi.Grid());
|
FermionField Din(psi.Grid());
|
||||||
|
|
||||||
@ -97,11 +97,10 @@ RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
|||||||
this->DW(Din, chi, DaggerNo);
|
this->DW(Din, chi, DaggerNo);
|
||||||
axpby(chi, 1.0, 1.0, chi, psi);
|
axpby(chi, 1.0, 1.0, chi, psi);
|
||||||
this->M5D(psi, chi);
|
this->M5D(psi, chi);
|
||||||
return(norm2(chi));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
void DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||||
{
|
{
|
||||||
FermionField Din(psi.Grid());
|
FermionField Din(psi.Grid());
|
||||||
|
|
||||||
@ -109,7 +108,6 @@ RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& c
|
|||||||
this->MeooeDag5D(Din, chi);
|
this->MeooeDag5D(Din, chi);
|
||||||
this->M5Ddag(psi, chi);
|
this->M5Ddag(psi, chi);
|
||||||
axpby(chi, 1.0, 1.0, chi, psi);
|
axpby(chi, 1.0, 1.0, chi, psi);
|
||||||
return(norm2(chi));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/********************************************************************
|
/********************************************************************
|
||||||
|
@ -470,21 +470,24 @@ void ImprovedStaggeredFermion5D<Impl>::MdirAll(const FermionField &in, std::vect
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerNo);
|
Dhop(in, out, DaggerNo);
|
||||||
return axpy_norm(out, mass, in, out);
|
axpy(out, mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD ImprovedStaggeredFermion5D<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerYes);
|
Dhop(in, out, DaggerYes);
|
||||||
return axpy_norm(out, mass, in, out);
|
axpy(out, mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
if (in.Checkerboard() == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerNo);
|
DhopEO(in, out, DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
@ -492,7 +495,8 @@ void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionFiel
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
if (in.Checkerboard() == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerYes);
|
DhopEO(in, out, DaggerYes);
|
||||||
} else {
|
} else {
|
||||||
@ -501,27 +505,30 @@ void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionF
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
typename FermionField::scalar_type scal(mass);
|
typename FermionField::scalar_type scal(mass);
|
||||||
out = scal * in;
|
out = scal * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Mooee(in, out);
|
Mooee(in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
out = (1.0 / (mass)) * in;
|
out = (1.0 / (mass)) * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,
|
void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,FermionField &out)
|
||||||
FermionField &out) {
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
MooeeInv(in, out);
|
MooeeInv(in, out);
|
||||||
}
|
}
|
||||||
|
@ -171,21 +171,24 @@ void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const
|
|||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD ImprovedStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerNo);
|
Dhop(in, out, DaggerNo);
|
||||||
return axpy_norm(out, mass, in, out);
|
axpy(out, mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD ImprovedStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerYes);
|
Dhop(in, out, DaggerYes);
|
||||||
return axpy_norm(out, mass, in, out);
|
axpy(out, mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
if (in.Checkerboard() == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerNo);
|
DhopEO(in, out, DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
@ -193,7 +196,8 @@ void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
if (in.Checkerboard() == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerYes);
|
DhopEO(in, out, DaggerYes);
|
||||||
} else {
|
} else {
|
||||||
@ -202,27 +206,30 @@ void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionFie
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
typename FermionField::scalar_type scal(mass);
|
typename FermionField::scalar_type scal(mass);
|
||||||
out = scal * in;
|
out = scal * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Mooee(in, out);
|
Mooee(in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
void ImprovedStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
out = (1.0 / (mass)) * in;
|
out = (1.0 / (mass)) * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,
|
void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,FermionField &out)
|
||||||
FermionField &out) {
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
MooeeInv(in, out);
|
MooeeInv(in, out);
|
||||||
}
|
}
|
||||||
@ -234,7 +241,8 @@ void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||||
GaugeField & mat,
|
GaugeField & mat,
|
||||||
const FermionField &A, const FermionField &B, int dag) {
|
const FermionField &A, const FermionField &B, int dag)
|
||||||
|
{
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
@ -284,8 +292,8 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||||
|
{
|
||||||
conformable(U.Grid(), _grid);
|
conformable(U.Grid(), _grid);
|
||||||
conformable(U.Grid(), V.Grid());
|
conformable(U.Grid(), V.Grid());
|
||||||
conformable(U.Grid(), mat.Grid());
|
conformable(U.Grid(), mat.Grid());
|
||||||
@ -296,8 +304,8 @@ void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionFie
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||||
|
{
|
||||||
conformable(U.Grid(), _cbgrid);
|
conformable(U.Grid(), _cbgrid);
|
||||||
conformable(U.Grid(), V.Grid());
|
conformable(U.Grid(), V.Grid());
|
||||||
conformable(U.Grid(), mat.Grid());
|
conformable(U.Grid(), mat.Grid());
|
||||||
@ -310,8 +318,8 @@ void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionF
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||||
|
{
|
||||||
conformable(U.Grid(), _cbgrid);
|
conformable(U.Grid(), _cbgrid);
|
||||||
conformable(U.Grid(), V.Grid());
|
conformable(U.Grid(), V.Grid());
|
||||||
conformable(U.Grid(), mat.Grid());
|
conformable(U.Grid(), mat.Grid());
|
||||||
|
@ -166,7 +166,7 @@ void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& c
|
|||||||
/*****************************************************************************************************/
|
/*****************************************************************************************************/
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
void MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||||
{
|
{
|
||||||
FermionField Din(psi.Grid());
|
FermionField Din(psi.Grid());
|
||||||
|
|
||||||
@ -174,11 +174,10 @@ RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
|||||||
this->DW(Din, chi, DaggerNo);
|
this->DW(Din, chi, DaggerNo);
|
||||||
axpby(chi, 1.0, 1.0, chi, psi);
|
axpby(chi, 1.0, 1.0, chi, psi);
|
||||||
this->M5D(psi, chi);
|
this->M5D(psi, chi);
|
||||||
return(norm2(chi));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
void MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||||
{
|
{
|
||||||
FermionField Din(psi.Grid());
|
FermionField Din(psi.Grid());
|
||||||
|
|
||||||
@ -186,7 +185,6 @@ RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
|||||||
this->MeooeDag5D(Din, chi);
|
this->MeooeDag5D(Din, chi);
|
||||||
this->M5Ddag(psi, chi);
|
this->M5Ddag(psi, chi);
|
||||||
axpby(chi, 1.0, 1.0, chi, psi);
|
axpby(chi, 1.0, 1.0, chi, psi);
|
||||||
return(norm2(chi));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/********************************************************************
|
/********************************************************************
|
||||||
|
@ -128,17 +128,17 @@ void NaiveStaggeredFermion<Impl>::ImportGauge(const GaugeField &_U)
|
|||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD NaiveStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
void NaiveStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerNo);
|
Dhop(in, out, DaggerNo);
|
||||||
return axpy_norm(out, mass, in, out);
|
axpy(out, mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD NaiveStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
void NaiveStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerYes);
|
Dhop(in, out, DaggerYes);
|
||||||
return axpy_norm(out, mass, in, out);
|
axpy(out, mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -269,16 +269,14 @@ void PartialFractionFermion5D<Impl>::M_internal(const FermionField &psi, Fermi
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
RealD PartialFractionFermion5D<Impl>::M (const FermionField &in, FermionField &out)
|
void PartialFractionFermion5D<Impl>::M (const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
M_internal(in,out,DaggerNo);
|
M_internal(in,out,DaggerNo);
|
||||||
return norm2(out);
|
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
RealD PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
|
void PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
M_internal(in,out,DaggerYes);
|
M_internal(in,out,DaggerYes);
|
||||||
return norm2(out);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
|
@ -35,7 +35,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
|
|
||||||
// *NOT* EO
|
// *NOT* EO
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
|
void WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
FermionField temp(out.Grid());
|
FermionField temp(out.Grid());
|
||||||
|
|
||||||
@ -47,11 +47,10 @@ RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
|
|||||||
Mooee(in, temp);
|
Mooee(in, temp);
|
||||||
|
|
||||||
out += temp;
|
out += temp;
|
||||||
return norm2(out);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
void WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
FermionField temp(out.Grid());
|
FermionField temp(out.Grid());
|
||||||
|
|
||||||
@ -63,7 +62,6 @@ RealD WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
|||||||
MooeeDag(in, temp);
|
MooeeDag(in, temp);
|
||||||
|
|
||||||
out += temp;
|
out += temp;
|
||||||
return norm2(out);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -132,14 +130,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
|||||||
pickCheckerboard(Even, CloverTermEven, CloverTerm);
|
pickCheckerboard(Even, CloverTermEven, CloverTerm);
|
||||||
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
|
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
|
||||||
|
|
||||||
pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
|
pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm)));
|
||||||
pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
|
pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm)));
|
||||||
|
|
||||||
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
|
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
|
||||||
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
|
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
|
||||||
|
|
||||||
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
|
pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv)));
|
||||||
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
|
pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -102,21 +102,24 @@ void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
|||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerNo);
|
Dhop(in, out, DaggerNo);
|
||||||
return axpy_norm(out, diag_mass, in, out);
|
axpy(out, diag_mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerYes);
|
Dhop(in, out, DaggerYes);
|
||||||
return axpy_norm(out, diag_mass, in, out);
|
axpy(out, diag_mass, in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
if (in.Checkerboard() == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerNo);
|
DhopEO(in, out, DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
@ -125,7 +128,8 @@ void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
if (in.Checkerboard() == Odd) {
|
if (in.Checkerboard() == Odd) {
|
||||||
DhopEO(in, out, DaggerYes);
|
DhopEO(in, out, DaggerYes);
|
||||||
} else {
|
} else {
|
||||||
@ -134,26 +138,30 @@ void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
typename FermionField::scalar_type scal(diag_mass);
|
typename FermionField::scalar_type scal(diag_mass);
|
||||||
out = scal * in;
|
out = scal * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Mooee(in, out);
|
Mooee(in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
out = (1.0/(diag_mass))*in;
|
out = (1.0/(diag_mass))*in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
MooeeInv(in,out);
|
MooeeInv(in,out);
|
||||||
}
|
}
|
||||||
@ -249,7 +257,8 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||||
|
{
|
||||||
conformable(U.Grid(), _grid);
|
conformable(U.Grid(), _grid);
|
||||||
conformable(U.Grid(), V.Grid());
|
conformable(U.Grid(), V.Grid());
|
||||||
conformable(U.Grid(), mat.Grid());
|
conformable(U.Grid(), mat.Grid());
|
||||||
@ -260,7 +269,8 @@ void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||||
|
{
|
||||||
conformable(U.Grid(), _cbgrid);
|
conformable(U.Grid(), _cbgrid);
|
||||||
conformable(U.Grid(), V.Grid());
|
conformable(U.Grid(), V.Grid());
|
||||||
//conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
|
//conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
|
||||||
@ -274,7 +284,8 @@ void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, co
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||||
|
{
|
||||||
conformable(U.Grid(), _cbgrid);
|
conformable(U.Grid(), _cbgrid);
|
||||||
conformable(U.Grid(), V.Grid());
|
conformable(U.Grid(), V.Grid());
|
||||||
//conformable(U.Grid(), mat.Grid());
|
//conformable(U.Grid(), mat.Grid());
|
||||||
@ -287,7 +298,8 @@ void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, co
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
|
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
||||||
|
{
|
||||||
conformable(in.Grid(), _grid); // verifies full grid
|
conformable(in.Grid(), _grid); // verifies full grid
|
||||||
conformable(in.Grid(), out.Grid());
|
conformable(in.Grid(), out.Grid());
|
||||||
|
|
||||||
@ -297,7 +309,8 @@ void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int da
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
|
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||||
|
{
|
||||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -308,7 +321,8 @@ void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
|
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||||
|
{
|
||||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -386,7 +400,8 @@ template <class Impl>
|
|||||||
void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
|
void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeField &U,
|
||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out, int dag) {
|
FermionField &out, int dag)
|
||||||
|
{
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
@ -436,7 +451,8 @@ template <class Impl>
|
|||||||
void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
|
void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeField &U,
|
||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out, int dag) {
|
FermionField &out, int dag)
|
||||||
|
{
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
st.HaloExchange(in, compressor);
|
st.HaloExchange(in, compressor);
|
||||||
|
@ -447,19 +447,19 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
|||||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
|
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
|
||||||
#ifndef GRID_CUDA
|
#ifndef GRID_CUDA
|
||||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
|
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
|
||||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;}
|
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); return;}
|
||||||
#endif
|
#endif
|
||||||
} else if( interior ) {
|
} else if( interior ) {
|
||||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;}
|
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;}
|
||||||
#ifndef GRID_CUDA
|
#ifndef GRID_CUDA
|
||||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;}
|
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;}
|
||||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;}
|
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
|
||||||
#endif
|
#endif
|
||||||
} else if( exterior ) {
|
} else if( exterior ) {
|
||||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
|
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
|
||||||
#ifndef GRID_CUDA
|
#ifndef GRID_CUDA
|
||||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
|
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
|
||||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;}
|
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
assert(0 && " Kernel optimisation case not covered ");
|
assert(0 && " Kernel optimisation case not covered ");
|
||||||
|
@ -59,7 +59,7 @@ public:
|
|||||||
}
|
}
|
||||||
static inline GaugeLinkField
|
static inline GaugeLinkField
|
||||||
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
|
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
|
||||||
return Cshift(adj(Link), mu, -1);
|
return Cshift(closure(adj(Link)), mu, -1);
|
||||||
}
|
}
|
||||||
static inline GaugeLinkField
|
static inline GaugeLinkField
|
||||||
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
|
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
|
||||||
|
@ -39,6 +39,10 @@ directory
|
|||||||
#include <Grid/parallelIO/IldgIOtypes.h>
|
#include <Grid/parallelIO/IldgIOtypes.h>
|
||||||
#include <Grid/parallelIO/IldgIO.h>
|
#include <Grid/parallelIO/IldgIO.h>
|
||||||
#include <Grid/parallelIO/NerscIO.h>
|
#include <Grid/parallelIO/NerscIO.h>
|
||||||
|
#include <Grid/parallelIO/OpenQcdIO.h>
|
||||||
|
#if !defined(GRID_COMMS_NONE)
|
||||||
|
#include <Grid/parallelIO/OpenQcdIOChromaReference.h>
|
||||||
|
#endif
|
||||||
NAMESPACE_CHECK(Ildg);
|
NAMESPACE_CHECK(Ildg);
|
||||||
|
|
||||||
#include <Grid/qcd/hmc/checkpointers/CheckPointers.h>
|
#include <Grid/qcd/hmc/checkpointers/CheckPointers.h>
|
||||||
|
@ -80,6 +80,7 @@ static Registrar<OneFlavourRatioEOFModule<FermionImplementationPolicy>,
|
|||||||
|
|
||||||
static Registrar< ConjugateGradientModule<WilsonFermionR::FermionField>,
|
static Registrar< ConjugateGradientModule<WilsonFermionR::FermionField>,
|
||||||
HMC_SolverModuleFactory<solver_string, WilsonFermionR::FermionField, Serialiser> > __CGWFmodXMLInit("ConjugateGradient");
|
HMC_SolverModuleFactory<solver_string, WilsonFermionR::FermionField, Serialiser> > __CGWFmodXMLInit("ConjugateGradient");
|
||||||
|
|
||||||
static Registrar< BiCGSTABModule<WilsonFermionR::FermionField>,
|
static Registrar< BiCGSTABModule<WilsonFermionR::FermionField>,
|
||||||
HMC_SolverModuleFactory<solver_string, WilsonFermionR::FermionField, Serialiser> > __BiCGWFmodXMLInit("BiCGSTAB");
|
HMC_SolverModuleFactory<solver_string, WilsonFermionR::FermionField, Serialiser> > __BiCGWFmodXMLInit("BiCGSTAB");
|
||||||
static Registrar< ConjugateResidualModule<WilsonFermionR::FermionField>,
|
static Registrar< ConjugateResidualModule<WilsonFermionR::FermionField>,
|
||||||
|
@ -46,7 +46,7 @@ public:
|
|||||||
typedef typename SpinMatrixField::vector_object sobj;
|
typedef typename SpinMatrixField::vector_object sobj;
|
||||||
|
|
||||||
static const int epsilon[6][3] ;
|
static const int epsilon[6][3] ;
|
||||||
static const Complex epsilon_sgn[6];
|
static const Real epsilon_sgn[6];
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <class mobj, class robj>
|
template <class mobj, class robj>
|
||||||
@ -151,14 +151,18 @@ public:
|
|||||||
|
|
||||||
template <class FImpl>
|
template <class FImpl>
|
||||||
const int BaryonUtils<FImpl>::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}};
|
const int BaryonUtils<FImpl>::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}};
|
||||||
template <class FImpl>
|
/*template <class FImpl>
|
||||||
const Complex BaryonUtils<FImpl>::epsilon_sgn[6] = {Complex(1),
|
const Complex BaryonUtils<FImpl>::epsilon_sgn[6] = {Complex(1),
|
||||||
Complex(1),
|
Complex(1),
|
||||||
Complex(1),
|
Complex(1),
|
||||||
Complex(-1),
|
Complex(-1),
|
||||||
Complex(-1),
|
Complex(-1),
|
||||||
Complex(-1)};
|
Complex(-1)};
|
||||||
|
*/
|
||||||
|
template <class FImpl>
|
||||||
|
const Real BaryonUtils<FImpl>::epsilon_sgn[6] = {1.,1.,1.,-1.,-1.,-1.};
|
||||||
|
|
||||||
|
//This is the old version
|
||||||
template <class FImpl>
|
template <class FImpl>
|
||||||
template <class mobj, class robj>
|
template <class mobj, class robj>
|
||||||
void BaryonUtils<FImpl>::baryon_site(const mobj &D1,
|
void BaryonUtils<FImpl>::baryon_site(const mobj &D1,
|
||||||
@ -174,11 +178,13 @@ void BaryonUtils<FImpl>::baryon_site(const mobj &D1,
|
|||||||
{
|
{
|
||||||
|
|
||||||
Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4)
|
Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4)
|
||||||
|
|
||||||
auto gD1a = GammaA_left * GammaA_right * D1;
|
auto gD1a = GammaA_left * GammaA_right * D1;
|
||||||
auto gD1b = GammaA_left * g4 * GammaA_right * D1;
|
auto gD1b = GammaA_left * g4 * GammaA_right * D1;
|
||||||
auto pD1 = 0.5* (gD1a + (double)parity * gD1b);
|
auto pD1 = 0.5* (gD1a + (Real)parity * gD1b);
|
||||||
auto gD3 = GammaB_right * D3;
|
auto gD3 = GammaB_right * D3;
|
||||||
|
auto D2g = D2 * GammaB_left;
|
||||||
|
auto pD1g = pD1 * GammaB_left;
|
||||||
|
auto gD3g = gD3 * GammaB_left;
|
||||||
|
|
||||||
for (int ie_left=0; ie_left < 6 ; ie_left++){
|
for (int ie_left=0; ie_left < 6 ; ie_left++){
|
||||||
int a_left = epsilon[ie_left][0]; //a
|
int a_left = epsilon[ie_left][0]; //a
|
||||||
@ -188,59 +194,78 @@ void BaryonUtils<FImpl>::baryon_site(const mobj &D1,
|
|||||||
int a_right = epsilon[ie_right][0]; //a'
|
int a_right = epsilon[ie_right][0]; //a'
|
||||||
int b_right = epsilon[ie_right][1]; //b'
|
int b_right = epsilon[ie_right][1]; //b'
|
||||||
int c_right = epsilon[ie_right][2]; //c'
|
int c_right = epsilon[ie_right][2]; //c'
|
||||||
|
Real ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right];
|
||||||
//This is the \delta_{456}^{123} part
|
//This is the \delta_{456}^{123} part
|
||||||
if (wick_contraction[0]){
|
if (wick_contraction[0]){
|
||||||
auto D2g = D2 * GammaB_left;
|
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
||||||
|
auto eepD1 = ee * pD1()(gamma_left,gamma_left)(c_right,c_left);
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
for (int beta_left=0; beta_left<Ns; beta_left++){
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
auto D2g_ab = D2g()(alpha_right,beta_left)(a_right,a_left);
|
||||||
result()()() += epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,gamma_left)(c_right,c_left)*D2g()(alpha_right,beta_left)(a_right,a_left)*gD3()(alpha_right,beta_left)(b_right,b_left);
|
auto gD3_ab = gD3()(alpha_right,beta_left)(b_right,b_left);
|
||||||
}}}
|
result()()() += eepD1*D2g_ab*gD3_ab;
|
||||||
|
}}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
//This is the \delta_{456}^{231} part
|
//This is the \delta_{456}^{231} part
|
||||||
if (wick_contraction[1]){
|
if (wick_contraction[1]){
|
||||||
auto pD1g = pD1 * GammaB_left;
|
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
||||||
result()()() += epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1g()(gamma_left,beta_left)(c_right,a_left)*D2()(alpha_right,beta_left)(a_right,b_left)*gD3()(alpha_right,gamma_left)(b_right,c_left);
|
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
||||||
}}}
|
auto gD3_ag = gD3()(alpha_right,gamma_left)(b_right,c_left);
|
||||||
|
for (int beta_left=0; beta_left<Ns; beta_left++){
|
||||||
|
auto eepD1g_gb = ee * pD1g()(gamma_left,beta_left)(c_right,a_left);
|
||||||
|
auto D2_ab = D2()(alpha_right,beta_left)(a_right,b_left);
|
||||||
|
result()()() += eepD1g_gb*D2_ab*gD3_ag;
|
||||||
|
}
|
||||||
|
}}
|
||||||
}
|
}
|
||||||
//This is the \delta_{456}^{312} part
|
//This is the \delta_{456}^{312} part
|
||||||
if (wick_contraction[2]){
|
if (wick_contraction[2]){
|
||||||
auto gD3g = gD3 * GammaB_left;
|
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
||||||
result()()() += epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,beta_left)(c_right,b_left)*D2()(alpha_right,gamma_left)(a_right,c_left)*gD3g()(alpha_right,beta_left)(b_right,a_left);
|
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
||||||
}}}
|
auto D2_ag = D2()(alpha_right,gamma_left)(a_right,c_left);
|
||||||
|
for (int beta_left=0; beta_left<Ns; beta_left++){
|
||||||
|
auto eepD1_gb = ee * pD1()(gamma_left,beta_left)(c_right,b_left);
|
||||||
|
auto gD3g_ab = gD3g()(alpha_right,beta_left)(b_right,a_left);
|
||||||
|
result()()() += eepD1_gb*D2_ag*gD3g_ab;
|
||||||
|
}
|
||||||
|
}}
|
||||||
}
|
}
|
||||||
//This is the \delta_{456}^{132} part
|
//This is the \delta_{456}^{132} part
|
||||||
if (wick_contraction[3]){
|
if (wick_contraction[3]){
|
||||||
auto gD3g = gD3 * GammaB_left;
|
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
||||||
|
auto eepD1 = ee * pD1()(gamma_left,gamma_left)(c_right,c_left);
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
for (int beta_left=0; beta_left<Ns; beta_left++){
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
auto D2_ab = D2()(alpha_right,beta_left)(a_right,b_left);
|
||||||
result()()() -= epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,gamma_left)(c_right,c_left)*D2()(alpha_right,beta_left)(a_right,b_left)*gD3g()(alpha_right,beta_left)(b_right,a_left);
|
auto gD3g_ab = gD3g()(alpha_right,beta_left)(b_right,a_left);
|
||||||
}}}
|
result()()() -= eepD1*D2_ab*gD3g_ab;
|
||||||
|
}}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
//This is the \delta_{456}^{321} part
|
//This is the \delta_{456}^{321} part
|
||||||
if (wick_contraction[4]){
|
if (wick_contraction[4]){
|
||||||
auto D2g = D2 * GammaB_left;
|
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
||||||
result()()() -= epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,beta_left)(c_right,b_left)*D2g()(alpha_right,beta_left)(a_right,a_left)*gD3()(alpha_right,gamma_left)(b_right,c_left);
|
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
||||||
}}}
|
auto gD3_ag = gD3()(alpha_right,gamma_left)(b_right,c_left);
|
||||||
|
for (int beta_left=0; beta_left<Ns; beta_left++){
|
||||||
|
auto eepD1_gb = ee * pD1()(gamma_left,beta_left)(c_right,b_left);
|
||||||
|
auto D2g_ab = D2g()(alpha_right,beta_left)(a_right,a_left);
|
||||||
|
result()()() -= eepD1_gb*D2g_ab*gD3_ag;
|
||||||
|
}
|
||||||
|
}}
|
||||||
}
|
}
|
||||||
//This is the \delta_{456}^{213} part
|
//This is the \delta_{456}^{213} part
|
||||||
if (wick_contraction[5]){
|
if (wick_contraction[5]){
|
||||||
auto pD1g = pD1 * GammaB_left;
|
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
||||||
result()()() -= epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1g()(gamma_left,beta_left)(c_right,a_left)*D2()(alpha_right,gamma_left)(a_right,c_left)*gD3()(alpha_right,beta_left)(b_right,b_left);
|
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
||||||
}}}
|
auto D2_ag = D2()(alpha_right,gamma_left)(a_right,c_left);
|
||||||
|
for (int beta_left=0; beta_left<Ns; beta_left++){
|
||||||
|
auto eepD1g_gb = ee * pD1g()(gamma_left,beta_left)(c_right,a_left);
|
||||||
|
auto gD3_ab = gD3()(alpha_right,beta_left)(b_right,b_left);
|
||||||
|
result()()() -= eepD1g_gb*D2_ag*gD3_ab;
|
||||||
|
}
|
||||||
|
}}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -259,6 +284,10 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
|||||||
const int parity,
|
const int parity,
|
||||||
ComplexField &baryon_corr)
|
ComplexField &baryon_corr)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
assert(Ns==4 && "Baryon code only implemented for N_spin = 4");
|
||||||
|
assert(Nc==3 && "Baryon code only implemented for N_colour = 3");
|
||||||
|
|
||||||
std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl;
|
std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl;
|
||||||
std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl;
|
std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl;
|
||||||
std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl;
|
std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl;
|
||||||
@ -278,18 +307,32 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
|||||||
autoView( v2 , q2_left, CpuRead);
|
autoView( v2 , q2_left, CpuRead);
|
||||||
autoView( v3 , q3_left, CpuRead);
|
autoView( v3 , q3_left, CpuRead);
|
||||||
|
|
||||||
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
|
Real bytes =0.;
|
||||||
thread_for(ss,grid->oSites(),{
|
bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real));
|
||||||
//for(int ss=0; ss < grid->oSites(); ss++){
|
for (int ie=0; ie < 6 ; ie++){
|
||||||
|
if(ie==0 or ie==3){
|
||||||
|
bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contraction[ie];
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contraction[ie];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Real t=0.;
|
||||||
|
t =-usecond();
|
||||||
|
|
||||||
|
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
|
||||||
auto D1 = v1[ss];
|
auto D1 = v1[ss];
|
||||||
auto D2 = v2[ss];
|
auto D2 = v2[ss];
|
||||||
auto D3 = v3[ss];
|
auto D3 = v3[ss];
|
||||||
|
|
||||||
vobj result=Zero();
|
vobj result=Zero();
|
||||||
baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result);
|
baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result);
|
||||||
vbaryon_corr[ss] = result;
|
vbaryon_corr[ss] = result;
|
||||||
} );//end loop over lattice sites
|
} );//end loop over lattice sites
|
||||||
|
|
||||||
|
t += usecond();
|
||||||
|
|
||||||
|
std::cout << std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s " << std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
template <class FImpl>
|
template <class FImpl>
|
||||||
template <class mobj, class robj>
|
template <class mobj, class robj>
|
||||||
@ -305,6 +348,10 @@ void BaryonUtils<FImpl>::ContractBaryons_Sliced(const mobj &D1,
|
|||||||
const int parity,
|
const int parity,
|
||||||
robj &result)
|
robj &result)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
assert(Ns==4 && "Baryon code only implemented for N_spin = 4");
|
||||||
|
assert(Nc==3 && "Baryon code only implemented for N_colour = 3");
|
||||||
|
|
||||||
std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl;
|
std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl;
|
||||||
std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl;
|
std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl;
|
||||||
std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl;
|
std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl;
|
||||||
@ -318,7 +365,7 @@ void BaryonUtils<FImpl>::ContractBaryons_Sliced(const mobj &D1,
|
|||||||
wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0;
|
wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0;
|
||||||
|
|
||||||
result=Zero();
|
result=Zero();
|
||||||
baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result);
|
baryon_site<decltype(D1),decltype(result)>(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result);
|
||||||
}
|
}
|
||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
@ -558,6 +605,10 @@ void BaryonUtils<FImpl>::Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop,
|
|||||||
const std::string op,
|
const std::string op,
|
||||||
SpinMatrixField &stn_corr)
|
SpinMatrixField &stn_corr)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
assert(Ns==4 && "Baryon code only implemented for N_spin = 4");
|
||||||
|
assert(Nc==3 && "Baryon code only implemented for N_colour = 3");
|
||||||
|
|
||||||
GridBase *grid = qs_ti.Grid();
|
GridBase *grid = qs_ti.Grid();
|
||||||
|
|
||||||
autoView( vcorr, stn_corr, CpuWrite);
|
autoView( vcorr, stn_corr, CpuWrite);
|
||||||
@ -565,8 +616,7 @@ void BaryonUtils<FImpl>::Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop,
|
|||||||
autoView( vd_tf , qd_tf, CpuRead);
|
autoView( vd_tf , qd_tf, CpuRead);
|
||||||
autoView( vs_ti , qs_ti, CpuRead);
|
autoView( vs_ti , qs_ti, CpuRead);
|
||||||
|
|
||||||
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
|
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
|
||||||
thread_for(ss,grid->oSites(),{
|
|
||||||
auto Dq_loop = vq_loop[ss];
|
auto Dq_loop = vq_loop[ss];
|
||||||
auto Dd_tf = vd_tf[ss];
|
auto Dd_tf = vd_tf[ss];
|
||||||
auto Ds_ti = vs_ti[ss];
|
auto Ds_ti = vs_ti[ss];
|
||||||
@ -595,6 +645,10 @@ void BaryonUtils<FImpl>::Sigma_to_Nucleon_NonEye(const PropagatorField &qq_ti,
|
|||||||
const std::string op,
|
const std::string op,
|
||||||
SpinMatrixField &stn_corr)
|
SpinMatrixField &stn_corr)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
assert(Ns==4 && "Baryon code only implemented for N_spin = 4");
|
||||||
|
assert(Nc==3 && "Baryon code only implemented for N_colour = 3");
|
||||||
|
|
||||||
GridBase *grid = qs_ti.Grid();
|
GridBase *grid = qs_ti.Grid();
|
||||||
|
|
||||||
autoView( vcorr , stn_corr, CpuWrite);
|
autoView( vcorr , stn_corr, CpuWrite);
|
||||||
|
@ -52,6 +52,26 @@ namespace PeriodicBC {
|
|||||||
tmp = adj(Link)*field;
|
tmp = adj(Link)*field;
|
||||||
return Cshift(tmp,mu,-1);// moves towards positive mu
|
return Cshift(tmp,mu,-1);// moves towards positive mu
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class gauge,typename Op, typename T1> auto
|
||||||
|
CovShiftForward(const Lattice<gauge> &Link,
|
||||||
|
int mu,
|
||||||
|
const LatticeUnaryExpression<Op,T1> &expr)
|
||||||
|
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||||
|
{
|
||||||
|
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
|
||||||
|
return CovShiftForward(Link,mu,arg);
|
||||||
|
}
|
||||||
|
template<class gauge,typename Op, typename T1> auto
|
||||||
|
CovShiftBackward(const Lattice<gauge> &Link,
|
||||||
|
int mu,
|
||||||
|
const LatticeUnaryExpression<Op,T1> &expr)
|
||||||
|
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||||
|
{
|
||||||
|
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
|
||||||
|
return CovShiftForward(Link,mu,arg);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -122,6 +142,26 @@ namespace ConjugateBC {
|
|||||||
return Cshift(tmp,mu,-1);// moves towards positive mu
|
return Cshift(tmp,mu,-1);// moves towards positive mu
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class gauge,typename Op, typename T1> auto
|
||||||
|
CovShiftForward(const Lattice<gauge> &Link,
|
||||||
|
int mu,
|
||||||
|
const LatticeUnaryExpression<Op,T1> &expr)
|
||||||
|
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||||
|
{
|
||||||
|
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
|
||||||
|
return CovShiftForward(Link,mu,arg);
|
||||||
|
}
|
||||||
|
template<class gauge,typename Op, typename T1> auto
|
||||||
|
CovShiftBackward(const Lattice<gauge> &Link,
|
||||||
|
int mu,
|
||||||
|
const LatticeUnaryExpression<Op,T1> &expr)
|
||||||
|
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||||
|
{
|
||||||
|
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
|
||||||
|
return CovShiftForward(Link,mu,arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -485,7 +485,7 @@ public:
|
|||||||
|
|
||||||
// Up staple ___ ___
|
// Up staple ___ ___
|
||||||
// | |
|
// | |
|
||||||
tmp = Cshift(adj(U[nu]), nu, -1);
|
tmp = Cshift(closure(adj(U[nu])), nu, -1);
|
||||||
tmp = adj(U2[mu]) * tmp;
|
tmp = adj(U2[mu]) * tmp;
|
||||||
tmp = Cshift(tmp, mu, -2);
|
tmp = Cshift(tmp, mu, -2);
|
||||||
|
|
||||||
@ -519,7 +519,7 @@ public:
|
|||||||
//
|
//
|
||||||
// | |
|
// | |
|
||||||
|
|
||||||
tmp = Cshift(adj(U2[nu]), nu, -2);
|
tmp = Cshift(closure(adj(U2[nu])), nu, -2);
|
||||||
tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
|
tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
|
||||||
tmp = U2[nu] * Cshift(tmp, nu, 2);
|
tmp = U2[nu] * Cshift(tmp, nu, 2);
|
||||||
Stap += Cshift(tmp, mu, 1);
|
Stap += Cshift(tmp, mu, 1);
|
||||||
|
@ -87,11 +87,7 @@ namespace Grid {
|
|||||||
template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType>
|
template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType>
|
||||||
struct is_tensor_fixed<Eigen::TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType>>
|
struct is_tensor_fixed<Eigen::TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType>>
|
||||||
: public std::true_type {};
|
: public std::true_type {};
|
||||||
template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType,
|
template<typename T> struct is_tensor_fixed<Eigen::TensorMap<T>> : public std::true_type {};
|
||||||
int MapOptions_, template <class> class MapPointer_>
|
|
||||||
struct is_tensor_fixed<Eigen::TensorMap<Eigen::TensorFixedSize<Scalar_, Dimensions_,
|
|
||||||
Options_, IndexType>, MapOptions_, MapPointer_>>
|
|
||||||
: public std::true_type {};
|
|
||||||
|
|
||||||
// Is this a variable-size Eigen tensor
|
// Is this a variable-size Eigen tensor
|
||||||
template<typename T, typename V = void> struct is_tensor_variable : public std::false_type {};
|
template<typename T, typename V = void> struct is_tensor_variable : public std::false_type {};
|
||||||
|
@ -114,7 +114,8 @@ THE SOFTWARE.
|
|||||||
#define GRID_MACRO_WRITE_MEMBER(A,B) ::Grid::write(WR,#B,obj. B);
|
#define GRID_MACRO_WRITE_MEMBER(A,B) ::Grid::write(WR,#B,obj. B);
|
||||||
|
|
||||||
#define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...)\
|
#define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...)\
|
||||||
std::string SerialisableClassName(void) const {return std::string(#cname);} \
|
static inline std::string SerialisableClassName(void) {return std::string(#cname);} \
|
||||||
|
static constexpr bool isEnum = false; \
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\
|
||||||
template <typename T>\
|
template <typename T>\
|
||||||
static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \
|
static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \
|
||||||
@ -162,6 +163,8 @@ public:\
|
|||||||
public:\
|
public:\
|
||||||
accelerator name(void) : value_(undefname) {}; \
|
accelerator name(void) : value_(undefname) {}; \
|
||||||
accelerator name(int value): value_(value) {}; \
|
accelerator name(int value): value_(value) {}; \
|
||||||
|
static inline std::string SerialisableClassName(void) {return std::string(#name);}\
|
||||||
|
static constexpr bool isEnum = true; \
|
||||||
template <typename T>\
|
template <typename T>\
|
||||||
static inline void write(::Grid::Writer<T> &WR,const std::string &s, const name &obj) \
|
static inline void write(::Grid::Writer<T> &WR,const std::string &s, const name &obj) \
|
||||||
{\
|
{\
|
||||||
|
@ -432,12 +432,10 @@ namespace Grid {
|
|||||||
std::vector<T> strToVec(const std::string s)
|
std::vector<T> strToVec(const std::string s)
|
||||||
{
|
{
|
||||||
std::istringstream sstr(s);
|
std::istringstream sstr(s);
|
||||||
T buf;
|
|
||||||
std::vector<T> v;
|
std::vector<T> v;
|
||||||
|
|
||||||
while(!sstr.eof())
|
for(T buf; sstr >> buf;)
|
||||||
{
|
{
|
||||||
sstr >> buf;
|
|
||||||
v.push_back(buf);
|
v.push_back(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ Copyright (C) 2015
|
|||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Michael Marshall <michael.marshall@ed.ac.au>
|
Author: Michael Marshall <michael.marshall@ed.ac.au>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -55,6 +56,7 @@ class GridTensorBase {};
|
|||||||
using Complexified = typename Traits::Complexified; \
|
using Complexified = typename Traits::Complexified; \
|
||||||
using Realified = typename Traits::Realified; \
|
using Realified = typename Traits::Realified; \
|
||||||
using DoublePrecision = typename Traits::DoublePrecision; \
|
using DoublePrecision = typename Traits::DoublePrecision; \
|
||||||
|
using DoublePrecision2= typename Traits::DoublePrecision2; \
|
||||||
static constexpr int TensorLevel = Traits::TensorLevel
|
static constexpr int TensorLevel = Traits::TensorLevel
|
||||||
|
|
||||||
template <class vtype>
|
template <class vtype>
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -194,6 +195,79 @@ auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decl
|
|||||||
ret._internal = innerProductD(lhs._internal,rhs._internal);
|
ret._internal = innerProductD(lhs._internal,rhs._internal);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////
|
||||||
|
// innerProductD2: precision promotion without inner sum
|
||||||
|
//////////////////////////////////////
|
||||||
|
|
||||||
|
accelerator_inline vComplexD2 TensorRemove(const vComplexD2 & x) { return x; };
|
||||||
|
accelerator_inline vRealD2 TensorRemove(const vRealD2 & x) { return x; };
|
||||||
|
|
||||||
|
accelerator_inline ComplexD innerProductD2(const ComplexF &l,const ComplexF &r){ return innerProduct(l,r); }
|
||||||
|
accelerator_inline ComplexD innerProductD2(const ComplexD &l,const ComplexD &r){ return innerProduct(l,r); }
|
||||||
|
accelerator_inline RealD innerProductD2(const RealD &l,const RealD &r){ return innerProduct(l,r); }
|
||||||
|
accelerator_inline RealD innerProductD2(const RealF &l,const RealF &r){ return innerProduct(l,r); }
|
||||||
|
|
||||||
|
accelerator_inline vComplexD innerProductD2(const vComplexD &l,const vComplexD &r){ return innerProduct(l,r); }
|
||||||
|
accelerator_inline vRealD innerProductD2(const vRealD &l,const vRealD &r){ return innerProduct(l,r); }
|
||||||
|
|
||||||
|
accelerator_inline vComplexD2 innerProductD2(const vComplexF &l,const vComplexF &r)
|
||||||
|
{
|
||||||
|
vComplexD la,lb;
|
||||||
|
vComplexD ra,rb;
|
||||||
|
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
|
||||||
|
Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
|
||||||
|
vComplexD2 ret;
|
||||||
|
ret._internal[0] = innerProduct(la,ra);
|
||||||
|
ret._internal[1] = innerProduct(lb,rb);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
accelerator_inline vRealD2 innerProductD2(const vRealF &l,const vRealF &r)
|
||||||
|
{
|
||||||
|
vRealD la,lb;
|
||||||
|
vRealD ra,rb;
|
||||||
|
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
|
||||||
|
Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
|
||||||
|
vRealD2 ret;
|
||||||
|
ret._internal[0]=innerProduct(la,ra);
|
||||||
|
ret._internal[1]=innerProduct(lb,rb);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now do it for vector, matrix, scalar
|
||||||
|
template<class l,class r,int N> accelerator_inline
|
||||||
|
auto innerProductD2 (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD2(lhs._internal[0],rhs._internal[0]))>
|
||||||
|
{
|
||||||
|
typedef decltype(innerProductD2(lhs._internal[0],rhs._internal[0])) ret_t;
|
||||||
|
iScalar<ret_t> ret;
|
||||||
|
zeroit(ret);
|
||||||
|
for(int c1=0;c1<N;c1++){
|
||||||
|
ret._internal += innerProductD2(lhs._internal[c1],rhs._internal[c1]);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class l,class r,int N> accelerator_inline
|
||||||
|
auto innerProductD2 (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD2(lhs._internal[0][0],rhs._internal[0][0]))>
|
||||||
|
{
|
||||||
|
typedef decltype(innerProductD2(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
|
||||||
|
iScalar<ret_t> ret;
|
||||||
|
ret=Zero();
|
||||||
|
for(int c1=0;c1<N;c1++){
|
||||||
|
for(int c2=0;c2<N;c2++){
|
||||||
|
ret._internal+=innerProductD2(lhs._internal[c1][c2],rhs._internal[c1][c2]);
|
||||||
|
}}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class l,class r> accelerator_inline
|
||||||
|
auto innerProductD2 (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD2(lhs._internal,rhs._internal))>
|
||||||
|
{
|
||||||
|
typedef decltype(innerProductD2(lhs._internal,rhs._internal)) ret_t;
|
||||||
|
iScalar<ret_t> ret;
|
||||||
|
ret._internal = innerProductD2(lhs._internal,rhs._internal);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////
|
//////////////////////
|
||||||
// Keep same precison
|
// Keep same precison
|
||||||
//////////////////////
|
//////////////////////
|
||||||
|
@ -6,6 +6,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
|||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||||
Author: Michael Marshall <michael.marshall@ed.ac.au>
|
Author: Michael Marshall <michael.marshall@ed.ac.au>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -37,6 +38,60 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
template<class T, int N> struct isGridTensor<iVector<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
|
template<class T, int N> struct isGridTensor<iVector<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
template<class T, int N> struct isGridTensor<iMatrix<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
|
template<class T, int N> struct isGridTensor<iMatrix<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
|
||||||
|
// Traits to identify scalars
|
||||||
|
template<typename T> struct isGridScalar : public std::false_type { static constexpr bool notvalue = true; };
|
||||||
|
template<class T> struct isGridScalar<iScalar<T>> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
|
||||||
|
// Store double-precision data in single-precision grids for precision promoted localInnerProductD
|
||||||
|
template<typename T>
|
||||||
|
class TypePair {
|
||||||
|
public:
|
||||||
|
T _internal[2];
|
||||||
|
TypePair<T>& operator=(const Grid::Zero& o) {
|
||||||
|
_internal[0] = Zero();
|
||||||
|
_internal[1] = Zero();
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
TypePair<T> operator+(const TypePair<T>& o) const {
|
||||||
|
TypePair<T> r;
|
||||||
|
r._internal[0] = _internal[0] + o._internal[0];
|
||||||
|
r._internal[1] = _internal[1] + o._internal[1];
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
TypePair<T>& operator+=(const TypePair<T>& o) {
|
||||||
|
_internal[0] += o._internal[0];
|
||||||
|
_internal[1] += o._internal[1];
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend accelerator_inline void add(TypePair<T>* ret, const TypePair<T>* a, const TypePair<T>* b) {
|
||||||
|
add(&ret->_internal[0],&a->_internal[0],&b->_internal[0]);
|
||||||
|
add(&ret->_internal[1],&a->_internal[1],&b->_internal[1]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
typedef TypePair<ComplexD> ComplexD2;
|
||||||
|
typedef TypePair<RealD> RealD2;
|
||||||
|
typedef TypePair<vComplexD> vComplexD2;
|
||||||
|
typedef TypePair<vRealD> vRealD2;
|
||||||
|
|
||||||
|
// Traits to identify fundamental data types
|
||||||
|
template<typename T> struct isGridFundamental : public std::false_type { static constexpr bool notvalue = true; };
|
||||||
|
template<> struct isGridFundamental<vComplexF> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<vComplexD> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<vRealF> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<vRealD> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<ComplexF> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<ComplexD> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<RealF> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<RealD> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<vComplexD2> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<vRealD2> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<ComplexD2> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
template<> struct isGridFundamental<RealD2> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////
|
||||||
// Want to recurse: GridTypeMapper<Matrix<vComplexD> >::scalar_type == ComplexD.
|
// Want to recurse: GridTypeMapper<Matrix<vComplexD> >::scalar_type == ComplexD.
|
||||||
// Use of a helper class like this allows us to template specialise and "dress"
|
// Use of a helper class like this allows us to template specialise and "dress"
|
||||||
@ -81,6 +136,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef ComplexF Complexified;
|
typedef ComplexF Complexified;
|
||||||
typedef RealF Realified;
|
typedef RealF Realified;
|
||||||
typedef RealD DoublePrecision;
|
typedef RealD DoublePrecision;
|
||||||
|
typedef RealD2 DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<RealD> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<RealD> : public GridTypeMapper_Base {
|
||||||
typedef RealD scalar_type;
|
typedef RealD scalar_type;
|
||||||
@ -93,6 +149,20 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef ComplexD Complexified;
|
typedef ComplexD Complexified;
|
||||||
typedef RealD Realified;
|
typedef RealD Realified;
|
||||||
typedef RealD DoublePrecision;
|
typedef RealD DoublePrecision;
|
||||||
|
typedef RealD DoublePrecision2;
|
||||||
|
};
|
||||||
|
template<> struct GridTypeMapper<RealD2> : public GridTypeMapper_Base {
|
||||||
|
typedef RealD2 scalar_type;
|
||||||
|
typedef RealD2 scalar_typeD;
|
||||||
|
typedef RealD2 vector_type;
|
||||||
|
typedef RealD2 vector_typeD;
|
||||||
|
typedef RealD2 tensor_reduced;
|
||||||
|
typedef RealD2 scalar_object;
|
||||||
|
typedef RealD2 scalar_objectD;
|
||||||
|
typedef ComplexD2 Complexified;
|
||||||
|
typedef RealD2 Realified;
|
||||||
|
typedef RealD2 DoublePrecision;
|
||||||
|
typedef RealD2 DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<ComplexF> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<ComplexF> : public GridTypeMapper_Base {
|
||||||
typedef ComplexF scalar_type;
|
typedef ComplexF scalar_type;
|
||||||
@ -105,6 +175,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef ComplexF Complexified;
|
typedef ComplexF Complexified;
|
||||||
typedef RealF Realified;
|
typedef RealF Realified;
|
||||||
typedef ComplexD DoublePrecision;
|
typedef ComplexD DoublePrecision;
|
||||||
|
typedef ComplexD2 DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<ComplexD> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<ComplexD> : public GridTypeMapper_Base {
|
||||||
typedef ComplexD scalar_type;
|
typedef ComplexD scalar_type;
|
||||||
@ -117,6 +188,20 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef ComplexD Complexified;
|
typedef ComplexD Complexified;
|
||||||
typedef RealD Realified;
|
typedef RealD Realified;
|
||||||
typedef ComplexD DoublePrecision;
|
typedef ComplexD DoublePrecision;
|
||||||
|
typedef ComplexD DoublePrecision2;
|
||||||
|
};
|
||||||
|
template<> struct GridTypeMapper<ComplexD2> : public GridTypeMapper_Base {
|
||||||
|
typedef ComplexD2 scalar_type;
|
||||||
|
typedef ComplexD2 scalar_typeD;
|
||||||
|
typedef ComplexD2 vector_type;
|
||||||
|
typedef ComplexD2 vector_typeD;
|
||||||
|
typedef ComplexD2 tensor_reduced;
|
||||||
|
typedef ComplexD2 scalar_object;
|
||||||
|
typedef ComplexD2 scalar_objectD;
|
||||||
|
typedef ComplexD2 Complexified;
|
||||||
|
typedef RealD2 Realified;
|
||||||
|
typedef ComplexD2 DoublePrecision;
|
||||||
|
typedef ComplexD2 DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<Integer> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<Integer> : public GridTypeMapper_Base {
|
||||||
typedef Integer scalar_type;
|
typedef Integer scalar_type;
|
||||||
@ -129,6 +214,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef void Complexified;
|
typedef void Complexified;
|
||||||
typedef void Realified;
|
typedef void Realified;
|
||||||
typedef void DoublePrecision;
|
typedef void DoublePrecision;
|
||||||
|
typedef void DoublePrecision2;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct GridTypeMapper<vRealF> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vRealF> : public GridTypeMapper_Base {
|
||||||
@ -142,6 +228,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexF Complexified;
|
typedef vComplexF Complexified;
|
||||||
typedef vRealF Realified;
|
typedef vRealF Realified;
|
||||||
typedef vRealD DoublePrecision;
|
typedef vRealD DoublePrecision;
|
||||||
|
typedef vRealD2 DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vRealD> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vRealD> : public GridTypeMapper_Base {
|
||||||
typedef RealD scalar_type;
|
typedef RealD scalar_type;
|
||||||
@ -154,6 +241,20 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexD Complexified;
|
typedef vComplexD Complexified;
|
||||||
typedef vRealD Realified;
|
typedef vRealD Realified;
|
||||||
typedef vRealD DoublePrecision;
|
typedef vRealD DoublePrecision;
|
||||||
|
typedef vRealD DoublePrecision2;
|
||||||
|
};
|
||||||
|
template<> struct GridTypeMapper<vRealD2> : public GridTypeMapper_Base {
|
||||||
|
typedef RealD2 scalar_type;
|
||||||
|
typedef RealD2 scalar_typeD;
|
||||||
|
typedef vRealD2 vector_type;
|
||||||
|
typedef vRealD2 vector_typeD;
|
||||||
|
typedef vRealD2 tensor_reduced;
|
||||||
|
typedef RealD2 scalar_object;
|
||||||
|
typedef RealD2 scalar_objectD;
|
||||||
|
typedef vComplexD2 Complexified;
|
||||||
|
typedef vRealD2 Realified;
|
||||||
|
typedef vRealD2 DoublePrecision;
|
||||||
|
typedef vRealD2 DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vRealH> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vRealH> : public GridTypeMapper_Base {
|
||||||
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
|
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
|
||||||
@ -167,6 +268,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexH Complexified;
|
typedef vComplexH Complexified;
|
||||||
typedef vRealH Realified;
|
typedef vRealH Realified;
|
||||||
typedef vRealD DoublePrecision;
|
typedef vRealD DoublePrecision;
|
||||||
|
typedef vRealD DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vComplexH> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vComplexH> : public GridTypeMapper_Base {
|
||||||
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
|
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
|
||||||
@ -180,6 +282,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexH Complexified;
|
typedef vComplexH Complexified;
|
||||||
typedef vRealH Realified;
|
typedef vRealH Realified;
|
||||||
typedef vComplexD DoublePrecision;
|
typedef vComplexD DoublePrecision;
|
||||||
|
typedef vComplexD DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vComplexF> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vComplexF> : public GridTypeMapper_Base {
|
||||||
typedef ComplexF scalar_type;
|
typedef ComplexF scalar_type;
|
||||||
@ -192,6 +295,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexF Complexified;
|
typedef vComplexF Complexified;
|
||||||
typedef vRealF Realified;
|
typedef vRealF Realified;
|
||||||
typedef vComplexD DoublePrecision;
|
typedef vComplexD DoublePrecision;
|
||||||
|
typedef vComplexD2 DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vComplexD> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vComplexD> : public GridTypeMapper_Base {
|
||||||
typedef ComplexD scalar_type;
|
typedef ComplexD scalar_type;
|
||||||
@ -204,6 +308,20 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexD Complexified;
|
typedef vComplexD Complexified;
|
||||||
typedef vRealD Realified;
|
typedef vRealD Realified;
|
||||||
typedef vComplexD DoublePrecision;
|
typedef vComplexD DoublePrecision;
|
||||||
|
typedef vComplexD DoublePrecision2;
|
||||||
|
};
|
||||||
|
template<> struct GridTypeMapper<vComplexD2> : public GridTypeMapper_Base {
|
||||||
|
typedef ComplexD2 scalar_type;
|
||||||
|
typedef ComplexD2 scalar_typeD;
|
||||||
|
typedef vComplexD2 vector_type;
|
||||||
|
typedef vComplexD2 vector_typeD;
|
||||||
|
typedef vComplexD2 tensor_reduced;
|
||||||
|
typedef ComplexD2 scalar_object;
|
||||||
|
typedef ComplexD2 scalar_objectD;
|
||||||
|
typedef vComplexD2 Complexified;
|
||||||
|
typedef vRealD2 Realified;
|
||||||
|
typedef vComplexD2 DoublePrecision;
|
||||||
|
typedef vComplexD2 DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vInteger> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vInteger> : public GridTypeMapper_Base {
|
||||||
typedef Integer scalar_type;
|
typedef Integer scalar_type;
|
||||||
@ -216,6 +334,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef void Complexified;
|
typedef void Complexified;
|
||||||
typedef void Realified;
|
typedef void Realified;
|
||||||
typedef void DoublePrecision;
|
typedef void DoublePrecision;
|
||||||
|
typedef void DoublePrecision2;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define GridTypeMapper_RepeatedTypes \
|
#define GridTypeMapper_RepeatedTypes \
|
||||||
@ -234,6 +353,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
using Complexified = iScalar<typename BaseTraits::Complexified>;
|
using Complexified = iScalar<typename BaseTraits::Complexified>;
|
||||||
using Realified = iScalar<typename BaseTraits::Realified>;
|
using Realified = iScalar<typename BaseTraits::Realified>;
|
||||||
using DoublePrecision = iScalar<typename BaseTraits::DoublePrecision>;
|
using DoublePrecision = iScalar<typename BaseTraits::DoublePrecision>;
|
||||||
|
using DoublePrecision2= iScalar<typename BaseTraits::DoublePrecision2>;
|
||||||
static constexpr int Rank = BaseTraits::Rank + 1;
|
static constexpr int Rank = BaseTraits::Rank + 1;
|
||||||
static constexpr std::size_t count = BaseTraits::count;
|
static constexpr std::size_t count = BaseTraits::count;
|
||||||
static constexpr int Dimension(int dim) {
|
static constexpr int Dimension(int dim) {
|
||||||
@ -248,6 +368,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
using Complexified = iVector<typename BaseTraits::Complexified, N>;
|
using Complexified = iVector<typename BaseTraits::Complexified, N>;
|
||||||
using Realified = iVector<typename BaseTraits::Realified, N>;
|
using Realified = iVector<typename BaseTraits::Realified, N>;
|
||||||
using DoublePrecision = iVector<typename BaseTraits::DoublePrecision, N>;
|
using DoublePrecision = iVector<typename BaseTraits::DoublePrecision, N>;
|
||||||
|
using DoublePrecision2= iVector<typename BaseTraits::DoublePrecision2, N>;
|
||||||
static constexpr int Rank = BaseTraits::Rank + 1;
|
static constexpr int Rank = BaseTraits::Rank + 1;
|
||||||
static constexpr std::size_t count = BaseTraits::count * N;
|
static constexpr std::size_t count = BaseTraits::count * N;
|
||||||
static constexpr int Dimension(int dim) {
|
static constexpr int Dimension(int dim) {
|
||||||
@ -262,6 +383,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
using Complexified = iMatrix<typename BaseTraits::Complexified, N>;
|
using Complexified = iMatrix<typename BaseTraits::Complexified, N>;
|
||||||
using Realified = iMatrix<typename BaseTraits::Realified, N>;
|
using Realified = iMatrix<typename BaseTraits::Realified, N>;
|
||||||
using DoublePrecision = iMatrix<typename BaseTraits::DoublePrecision, N>;
|
using DoublePrecision = iMatrix<typename BaseTraits::DoublePrecision, N>;
|
||||||
|
using DoublePrecision2= iMatrix<typename BaseTraits::DoublePrecision2, N>;
|
||||||
static constexpr int Rank = BaseTraits::Rank + 2;
|
static constexpr int Rank = BaseTraits::Rank + 2;
|
||||||
static constexpr std::size_t count = BaseTraits::count * N * N;
|
static constexpr std::size_t count = BaseTraits::count * N * N;
|
||||||
static constexpr int Dimension(int dim) {
|
static constexpr int Dimension(int dim) {
|
||||||
|
@ -56,6 +56,7 @@ std::string GridCmdVectorIntToString(const VectorInt & vec);
|
|||||||
void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec);
|
void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec);
|
||||||
template<class VectorInt>
|
template<class VectorInt>
|
||||||
void GridCmdOptionIntVector(std::string &str,VectorInt & vec);
|
void GridCmdOptionIntVector(std::string &str,VectorInt & vec);
|
||||||
|
void GridCmdOptionInt(std::string &str,int & val);
|
||||||
|
|
||||||
|
|
||||||
void GridParseLayout(char **argv,int argc,
|
void GridParseLayout(char **argv,int argc,
|
||||||
|
@ -30,7 +30,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
|
|
||||||
std::vector<int> L_list;
|
std::vector<int> L_list;
|
||||||
std::vector<int> Ls_list;
|
std::vector<int> Ls_list;
|
||||||
std::vector<double> mflop_list;
|
std::vector<double> mflop_list;
|
||||||
@ -76,7 +75,6 @@ struct controls {
|
|||||||
int Opt;
|
int Opt;
|
||||||
int CommsOverlap;
|
int CommsOverlap;
|
||||||
Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch;
|
Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch;
|
||||||
// int HugePages;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class Benchmark {
|
class Benchmark {
|
||||||
@ -119,14 +117,15 @@ public:
|
|||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
comms_header();
|
comms_header();
|
||||||
|
|
||||||
for(int lat=4;lat<=maxlat;lat+=4){
|
for(int lat=16;lat<=maxlat;lat+=8){
|
||||||
for(int Ls=8;Ls<=8;Ls*=2){
|
// for(int Ls=8;Ls<=8;Ls*=2){
|
||||||
|
{ int Ls=12;
|
||||||
|
|
||||||
Coordinate latt_size ({lat*mpi_layout[0],
|
Coordinate latt_size ({lat*mpi_layout[0],
|
||||||
lat*mpi_layout[1],
|
lat*mpi_layout[1],
|
||||||
lat*mpi_layout[2],
|
lat*mpi_layout[2],
|
||||||
lat*mpi_layout[3]});
|
lat*mpi_layout[3]});
|
||||||
|
std::cout << GridLogMessage<< latt_size <<std::endl;
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
RealD Nrank = Grid._Nprocessors;
|
RealD Nrank = Grid._Nprocessors;
|
||||||
RealD Nnode = Grid.NodeCount();
|
RealD Nnode = Grid.NodeCount();
|
||||||
@ -184,9 +183,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
timestat.statistics(t_time);
|
timestat.statistics(t_time);
|
||||||
// for(int i=0;i<t_time.size();i++){
|
|
||||||
// std::cout << i<<" "<<t_time[i]<<std::endl;
|
|
||||||
// }
|
|
||||||
|
|
||||||
dbytes=dbytes*ppn;
|
dbytes=dbytes*ppn;
|
||||||
double xbytes = dbytes*0.5;
|
double xbytes = dbytes*0.5;
|
||||||
@ -200,8 +196,6 @@ public:
|
|||||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -227,14 +221,15 @@ public:
|
|||||||
uint64_t NN;
|
uint64_t NN;
|
||||||
|
|
||||||
|
|
||||||
uint64_t lmax=48;
|
uint64_t lmax=32;
|
||||||
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
||||||
|
|
||||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||||
for(int lat=8;lat<=lmax;lat+=4){
|
for(int lat=8;lat<=lmax;lat+=8){
|
||||||
|
|
||||||
Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
// NP= Grid.RankCount();
|
// NP= Grid.RankCount();
|
||||||
@ -270,191 +265,8 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#if 0
|
|
||||||
static double DWF5(int Ls,int L)
|
|
||||||
{
|
|
||||||
// RealD mass=0.1;
|
|
||||||
RealD M5 =1.8;
|
|
||||||
|
|
||||||
double mflops;
|
static double DWF(int Ls,int L)
|
||||||
double mflops_best = 0;
|
|
||||||
double mflops_worst= 0;
|
|
||||||
std::vector<double> mflops_all;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////
|
|
||||||
// Set/Get the layout & grid size
|
|
||||||
///////////////////////////////////////////////////////
|
|
||||||
int threads = GridThread::GetThreads();
|
|
||||||
Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4);
|
|
||||||
Coordinate local({L,L,L,L});
|
|
||||||
|
|
||||||
GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(Coordinate({64,64,64,64}),
|
|
||||||
GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
|
||||||
uint64_t NP = TmpGrid->RankCount();
|
|
||||||
uint64_t NN = TmpGrid->NodeCount();
|
|
||||||
NN_global=NN;
|
|
||||||
uint64_t SHM=NP/NN;
|
|
||||||
|
|
||||||
Coordinate internal;
|
|
||||||
if ( SHM == 1 ) internal = Coordinate({1,1,1,1});
|
|
||||||
else if ( SHM == 2 ) internal = Coordinate({2,1,1,1});
|
|
||||||
else if ( SHM == 4 ) internal = Coordinate({2,2,1,1});
|
|
||||||
else if ( SHM == 8 ) internal = Coordinate({2,2,2,1});
|
|
||||||
else assert(0);
|
|
||||||
|
|
||||||
Coordinate nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]});
|
|
||||||
Coordinate latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]});
|
|
||||||
|
|
||||||
///////// Welcome message ////////////
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "Benchmark DWF Ls vec on "<<L<<"^4 local volume "<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "* MPI ranks : "<<GridCmdVectorIntToString(mpi)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "* Intranode : "<<GridCmdVectorIntToString(internal)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "* nodes : "<<GridCmdVectorIntToString(nodes)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
|
||||||
|
|
||||||
///////// Lattice Init ////////////
|
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
|
||||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
|
|
||||||
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
|
||||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
|
||||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
|
||||||
|
|
||||||
///////// RNG Init ////////////
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
|
||||||
std::vector<int> seeds5({5,6,7,8});
|
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
|
||||||
GridParallelRNG RNG5(sFGrid); RNG5.SeedFixedIntegers(seeds5);
|
|
||||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
|
||||||
|
|
||||||
///////// Source preparation ////////////
|
|
||||||
LatticeFermion src (sFGrid);
|
|
||||||
LatticeFermion tmp (sFGrid);
|
|
||||||
std::cout << GridLogMessage << "allocated src and tmp" << std::endl;
|
|
||||||
random(RNG5,src);
|
|
||||||
std::cout << GridLogMessage << "intialised random source" << std::endl;
|
|
||||||
|
|
||||||
RealD N2 = 1.0/::sqrt(norm2(src));
|
|
||||||
src = src*N2;
|
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
|
||||||
|
|
||||||
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
|
||||||
LatticeFermion src_e (sFrbGrid);
|
|
||||||
LatticeFermion src_o (sFrbGrid);
|
|
||||||
LatticeFermion r_e (sFrbGrid);
|
|
||||||
LatticeFermion r_o (sFrbGrid);
|
|
||||||
LatticeFermion r_eo (sFGrid);
|
|
||||||
LatticeFermion err (sFGrid);
|
|
||||||
{
|
|
||||||
|
|
||||||
pickCheckerboard(Even,src_e,src);
|
|
||||||
pickCheckerboard(Odd,src_o,src);
|
|
||||||
|
|
||||||
#if defined(AVX512)
|
|
||||||
const int num_cases = 6;
|
|
||||||
std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O ");
|
|
||||||
#else
|
|
||||||
const int num_cases = 4;
|
|
||||||
std::string fmt("U/S ; U/O ; G/S ; G/O ");
|
|
||||||
#endif
|
|
||||||
controls Cases [] = {
|
|
||||||
#ifdef AVX512
|
|
||||||
{ WilsonKernelsStatic::OptInlineAsm , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
|
||||||
{ WilsonKernelsStatic::OptInlineAsm , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
|
||||||
#endif
|
|
||||||
{ WilsonKernelsStatic::OptHandUnroll, WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
|
||||||
{ WilsonKernelsStatic::OptHandUnroll, WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
|
||||||
{ WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
|
||||||
{ WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
|
|
||||||
};
|
|
||||||
|
|
||||||
for(int c=0;c<num_cases;c++) {
|
|
||||||
|
|
||||||
WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
|
|
||||||
WilsonKernelsStatic::Opt = Cases[c].Opt;
|
|
||||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
|
||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
|
||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
|
||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
|
||||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
|
||||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
|
||||||
|
|
||||||
int nwarm = 100;
|
|
||||||
uint64_t ncall = 1000;
|
|
||||||
|
|
||||||
double t0=usecond();
|
|
||||||
sFGrid->Barrier();
|
|
||||||
for(int i=0;i<nwarm;i++){
|
|
||||||
sDw.DhopEO(src_o,r_e,DaggerNo);
|
|
||||||
}
|
|
||||||
sFGrid->Barrier();
|
|
||||||
double t1=usecond();
|
|
||||||
|
|
||||||
sDw.ZeroCounters();
|
|
||||||
time_statistics timestat;
|
|
||||||
std::vector<double> t_time(ncall);
|
|
||||||
for(uint64_t i=0;i<ncall;i++){
|
|
||||||
t0=usecond();
|
|
||||||
sDw.DhopEO(src_o,r_e,DaggerNo);
|
|
||||||
t1=usecond();
|
|
||||||
t_time[i] = t1-t0;
|
|
||||||
}
|
|
||||||
sFGrid->Barrier();
|
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
|
||||||
double flops=(1344.0*volume)/2;
|
|
||||||
double mf_hi, mf_lo, mf_err;
|
|
||||||
|
|
||||||
timestat.statistics(t_time);
|
|
||||||
mf_hi = flops/timestat.min;
|
|
||||||
mf_lo = flops/timestat.max;
|
|
||||||
mf_err= flops/timestat.min * timestat.err/timestat.mean;
|
|
||||||
|
|
||||||
mflops = flops/timestat.mean;
|
|
||||||
mflops_all.push_back(mflops);
|
|
||||||
if ( mflops_best == 0 ) mflops_best = mflops;
|
|
||||||
if ( mflops_worst== 0 ) mflops_worst= mflops;
|
|
||||||
if ( mflops>mflops_best ) mflops_best = mflops;
|
|
||||||
if ( mflops<mflops_worst) mflops_worst= mflops;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
|
||||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per rank "<< mflops/NP<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per node "<< mflops/NN<<std::endl;
|
|
||||||
|
|
||||||
sDw.Report();
|
|
||||||
|
|
||||||
}
|
|
||||||
double robust = mflops_worst/mflops_best;;
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best mflop/s = "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
|
|
||||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s = "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage <<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness = "<< robust <<std::endl;
|
|
||||||
std::cout<<GridLogMessage <<fmt << std::endl;
|
|
||||||
std::cout<<GridLogMessage;
|
|
||||||
|
|
||||||
for(int i=0;i<mflops_all.size();i++){
|
|
||||||
std::cout<<mflops_all[i]/NN<<" ; " ;
|
|
||||||
}
|
|
||||||
std::cout<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
return mflops_best;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static double DWF(int Ls,int L, double & robust)
|
|
||||||
{
|
{
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
@ -471,37 +283,30 @@ public:
|
|||||||
Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4);
|
Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4);
|
||||||
Coordinate local({L,L,L,L});
|
Coordinate local({L,L,L,L});
|
||||||
|
|
||||||
GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(Coordinate({64,64,64,64}),
|
GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(Coordinate({72,72,72,72}),
|
||||||
GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
uint64_t NP = TmpGrid->RankCount();
|
uint64_t NP = TmpGrid->RankCount();
|
||||||
uint64_t NN = TmpGrid->NodeCount();
|
uint64_t NN = TmpGrid->NodeCount();
|
||||||
NN_global=NN;
|
NN_global=NN;
|
||||||
uint64_t SHM=NP/NN;
|
uint64_t SHM=NP/NN;
|
||||||
|
|
||||||
Coordinate internal;
|
Coordinate latt4({local[0]*mpi[0],local[1]*mpi[1],local[2]*mpi[2],local[3]*mpi[3]});
|
||||||
if ( SHM == 1 ) internal = Coordinate({1,1,1,1});
|
|
||||||
else if ( SHM == 2 ) internal = Coordinate({2,1,1,1});
|
|
||||||
else if ( SHM == 4 ) internal = Coordinate({2,2,1,1});
|
|
||||||
else if ( SHM == 8 ) internal = Coordinate({2,2,2,1});
|
|
||||||
else assert(0);
|
|
||||||
|
|
||||||
Coordinate nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]});
|
|
||||||
Coordinate latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]});
|
|
||||||
|
|
||||||
///////// Welcome message ////////////
|
///////// Welcome message ////////////
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl;
|
std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl;
|
||||||
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
|
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
|
||||||
std::cout<<GridLogMessage << "* MPI ranks : "<<GridCmdVectorIntToString(mpi)<<std::endl;
|
std::cout<<GridLogMessage << "* ranks : "<<NP <<std::endl;
|
||||||
std::cout<<GridLogMessage << "* Intranode : "<<GridCmdVectorIntToString(internal)<<std::endl;
|
std::cout<<GridLogMessage << "* nodes : "<<NN <<std::endl;
|
||||||
std::cout<<GridLogMessage << "* nodes : "<<GridCmdVectorIntToString(nodes)<<std::endl;
|
std::cout<<GridLogMessage << "* ranks/node : "<<SHM <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "* ranks geom : "<<GridCmdVectorIntToString(mpi)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
///////// Lattice Init ////////////
|
///////// Lattice Init ////////////
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
@ -514,76 +319,31 @@ public:
|
|||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||||
|
|
||||||
|
typedef DomainWallFermionF Action;
|
||||||
|
typedef typename Action::FermionField Fermion;
|
||||||
|
typedef LatticeGaugeFieldF Gauge;
|
||||||
|
|
||||||
///////// Source preparation ////////////
|
///////// Source preparation ////////////
|
||||||
LatticeFermion src (FGrid); random(RNG5,src);
|
Gauge Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||||
LatticeFermion ref (FGrid);
|
Fermion src (FGrid); random(RNG5,src);
|
||||||
LatticeFermion tmp (FGrid);
|
Fermion src_e (FrbGrid);
|
||||||
|
Fermion src_o (FrbGrid);
|
||||||
|
Fermion r_e (FrbGrid);
|
||||||
|
Fermion r_o (FrbGrid);
|
||||||
|
Fermion r_eo (FGrid);
|
||||||
|
Action Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
RealD N2 = 1.0/::sqrt(norm2(src));
|
|
||||||
std::cout<<GridLogMessage << "Normalising src "<< N2 <<std::endl;
|
|
||||||
src = src*N2;
|
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
|
||||||
|
|
||||||
|
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
|
||||||
|
|
||||||
////////////////////////////////////
|
|
||||||
// Naive wilson implementation
|
|
||||||
////////////////////////////////////
|
|
||||||
{
|
|
||||||
LatticeGaugeField Umu5d(FGrid);
|
|
||||||
std::vector<LatticeColourMatrix> U(4,FGrid);
|
|
||||||
{
|
|
||||||
autoView( Umu_v , Umu , CpuRead);
|
|
||||||
autoView( Umu5d_v, Umu5d, CpuWrite);
|
|
||||||
for(int ss=0;ss<Umu.Grid()->oSites();ss++){
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
Umu5d_v[Ls*ss+s] = Umu_v[ss];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ref = Zero();
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
|
||||||
}
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
|
|
||||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
|
||||||
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
|
||||||
|
|
||||||
tmp =adj(U[mu])*src;
|
|
||||||
tmp =Cshift(tmp,mu+1,-1);
|
|
||||||
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
|
|
||||||
}
|
|
||||||
ref = -0.5*ref;
|
|
||||||
}
|
|
||||||
|
|
||||||
LatticeFermion src_e (FrbGrid);
|
|
||||||
LatticeFermion src_o (FrbGrid);
|
|
||||||
LatticeFermion r_e (FrbGrid);
|
|
||||||
LatticeFermion r_o (FrbGrid);
|
|
||||||
LatticeFermion r_eo (FGrid);
|
|
||||||
LatticeFermion err (FGrid);
|
|
||||||
{
|
{
|
||||||
|
|
||||||
pickCheckerboard(Even,src_e,src);
|
pickCheckerboard(Even,src_e,src);
|
||||||
pickCheckerboard(Odd,src_o,src);
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
|
||||||
#if defined(AVX512)
|
|
||||||
const int num_cases = 6;
|
|
||||||
std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O ");
|
|
||||||
#else
|
|
||||||
const int num_cases = 4;
|
const int num_cases = 4;
|
||||||
std::string fmt("U/S ; U/O ; G/S ; G/O ");
|
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
||||||
#endif
|
|
||||||
controls Cases [] = {
|
controls Cases [] = {
|
||||||
#ifdef AVX512
|
{ WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicyConcurrent },
|
||||||
{ WilsonKernelsStatic::OptInlineAsm , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
{ WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicyConcurrent },
|
||||||
{ WilsonKernelsStatic::OptInlineAsm , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
|
||||||
#endif
|
|
||||||
{ WilsonKernelsStatic::OptHandUnroll, WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
|
||||||
{ WilsonKernelsStatic::OptHandUnroll, WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
|
||||||
{ WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
{ WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||||
{ WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
|
{ WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
|
||||||
};
|
};
|
||||||
@ -596,15 +356,12 @@ public:
|
|||||||
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
|
||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential Comms/Compute" <<std::endl;
|
||||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
|
||||||
int nwarm = 200;
|
int nwarm = 10;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
for(int i=0;i<nwarm;i++){
|
for(int i=0;i<nwarm;i++){
|
||||||
@ -612,9 +369,7 @@ public:
|
|||||||
}
|
}
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
// uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0);
|
uint64_t ncall = 50;
|
||||||
// if (ncall < 500) ncall = 500;
|
|
||||||
uint64_t ncall = 1000;
|
|
||||||
|
|
||||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||||
|
|
||||||
@ -651,24 +406,11 @@ public:
|
|||||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
|
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
|
||||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
|
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
|
||||||
|
|
||||||
Dw.Report();
|
|
||||||
|
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
|
||||||
Dw.DhopOE(src_e,r_o,DaggerNo);
|
|
||||||
setCheckerboard(r_eo,r_o);
|
|
||||||
setCheckerboard(r_eo,r_e);
|
|
||||||
err = r_eo-ref;
|
|
||||||
RealD absref = norm2(ref);
|
|
||||||
RealD abserr = norm2(err);
|
|
||||||
std::cout<<GridLogMessage << "norm diff "<< abserr << " / " << absref<<std::endl;
|
|
||||||
assert(abserr<1.0e-4);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
robust = mflops_worst/mflops_best;
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best mflop/s = "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
|
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best mflop/s = "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
|
||||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s = "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
|
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s = "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
|
||||||
std::cout<<GridLogMessage << std::fixed<<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness = "<< robust <<std::endl;
|
|
||||||
std::cout<<GridLogMessage <<fmt << std::endl;
|
std::cout<<GridLogMessage <<fmt << std::endl;
|
||||||
std::cout<<GridLogMessage ;
|
std::cout<<GridLogMessage ;
|
||||||
|
|
||||||
@ -682,8 +424,166 @@ public:
|
|||||||
return mflops_best;
|
return mflops_best;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static double Staggered(int L)
|
||||||
|
{
|
||||||
|
double mflops;
|
||||||
|
double mflops_best = 0;
|
||||||
|
double mflops_worst= 0;
|
||||||
|
std::vector<double> mflops_all;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
// Set/Get the layout & grid size
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4);
|
||||||
|
Coordinate local({L,L,L,L});
|
||||||
|
|
||||||
|
GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(Coordinate({72,72,72,72}),
|
||||||
|
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
|
uint64_t NP = TmpGrid->RankCount();
|
||||||
|
uint64_t NN = TmpGrid->NodeCount();
|
||||||
|
NN_global=NN;
|
||||||
|
uint64_t SHM=NP/NN;
|
||||||
|
|
||||||
|
Coordinate latt4({local[0]*mpi[0],local[1]*mpi[1],local[2]*mpi[2],local[3]*mpi[3]});
|
||||||
|
|
||||||
|
///////// Welcome message ////////////
|
||||||
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Benchmark ImprovedStaggered on "<<L<<"^4 local volume "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "* ranks : "<<NP <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "* nodes : "<<NN <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "* ranks/node : "<<SHM <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "* ranks geom : "<<GridCmdVectorIntToString(mpi)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
|
||||||
|
///////// Lattice Init ////////////
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
|
||||||
|
|
||||||
|
///////// RNG Init ////////////
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
GridParallelRNG RNG4(FGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||||
|
|
||||||
|
RealD mass=0.1;
|
||||||
|
RealD c1=9.0/8.0;
|
||||||
|
RealD c2=-1.0/24.0;
|
||||||
|
RealD u0=1.0;
|
||||||
|
|
||||||
|
typedef ImprovedStaggeredFermionF Action;
|
||||||
|
typedef typename Action::FermionField Fermion;
|
||||||
|
typedef LatticeGaugeFieldF Gauge;
|
||||||
|
|
||||||
|
Gauge Umu(FGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
|
typename Action::ImplParams params;
|
||||||
|
Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params);
|
||||||
|
|
||||||
|
///////// Source preparation ////////////
|
||||||
|
Fermion src (FGrid); random(RNG4,src);
|
||||||
|
Fermion src_e (FrbGrid);
|
||||||
|
Fermion src_o (FrbGrid);
|
||||||
|
Fermion r_e (FrbGrid);
|
||||||
|
Fermion r_o (FrbGrid);
|
||||||
|
Fermion r_eo (FGrid);
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
pickCheckerboard(Even,src_e,src);
|
||||||
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
|
||||||
|
const int num_cases = 4;
|
||||||
|
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
||||||
|
|
||||||
|
controls Cases [] = {
|
||||||
|
{ StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicyConcurrent },
|
||||||
|
{ StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicyConcurrent },
|
||||||
|
{ StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||||
|
{ StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
for(int c=0;c<num_cases;c++) {
|
||||||
|
|
||||||
|
StaggeredKernelsStatic::Comms = Cases[c].CommsOverlap;
|
||||||
|
StaggeredKernelsStatic::Opt = Cases[c].Opt;
|
||||||
|
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
if ( StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc StaggeredKernels" <<std::endl;
|
||||||
|
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||||
|
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential Comms/Compute" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
|
||||||
|
int nwarm = 10;
|
||||||
|
double t0=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
for(int i=0;i<nwarm;i++){
|
||||||
|
Ds.DhopEO(src_o,r_e,DaggerNo);
|
||||||
|
}
|
||||||
|
FGrid->Barrier();
|
||||||
|
double t1=usecond();
|
||||||
|
uint64_t ncall = 500;
|
||||||
|
|
||||||
|
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||||
|
|
||||||
|
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||||
|
Ds.ZeroCounters();
|
||||||
|
|
||||||
|
time_statistics timestat;
|
||||||
|
std::vector<double> t_time(ncall);
|
||||||
|
for(uint64_t i=0;i<ncall;i++){
|
||||||
|
t0=usecond();
|
||||||
|
Ds.DhopEO(src_o,r_e,DaggerNo);
|
||||||
|
t1=usecond();
|
||||||
|
t_time[i] = t1-t0;
|
||||||
|
}
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
|
double volume=1; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=(1146.0*volume)/2;
|
||||||
|
double mf_hi, mf_lo, mf_err;
|
||||||
|
|
||||||
|
timestat.statistics(t_time);
|
||||||
|
mf_hi = flops/timestat.min;
|
||||||
|
mf_lo = flops/timestat.max;
|
||||||
|
mf_err= flops/timestat.min * timestat.err/timestat.mean;
|
||||||
|
|
||||||
|
mflops = flops/timestat.mean;
|
||||||
|
mflops_all.push_back(mflops);
|
||||||
|
if ( mflops_best == 0 ) mflops_best = mflops;
|
||||||
|
if ( mflops_worst== 0 ) mflops_worst= mflops;
|
||||||
|
if ( mflops>mflops_best ) mflops_best = mflops;
|
||||||
|
if ( mflops<mflops_worst) mflops_worst= mflops;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << L<<"^4 Deo Best mflop/s = "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << L<<"^4 Deo Worst mflop/s = "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<fmt << std::endl;
|
||||||
|
std::cout<<GridLogMessage ;
|
||||||
|
|
||||||
|
for(int i=0;i<mflops_all.size();i++){
|
||||||
|
std::cout<<mflops_all[i]/NN<<" ; " ;
|
||||||
|
}
|
||||||
|
std::cout<<std::endl;
|
||||||
|
}
|
||||||
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
return mflops_best;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
@ -698,62 +598,50 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
int do_memory=1;
|
int do_memory=1;
|
||||||
int do_comms =1;
|
int do_comms =1;
|
||||||
int do_su3 =0;
|
|
||||||
int do_wilson=1;
|
|
||||||
int do_dwf =1;
|
|
||||||
|
|
||||||
if ( do_su3 ) {
|
|
||||||
// empty for now
|
|
||||||
}
|
|
||||||
#if 1
|
|
||||||
int sel=2;
|
int sel=2;
|
||||||
Coordinate L_list({8,12,16,24});
|
std::vector<int> L_list({16,24,32});
|
||||||
#else
|
|
||||||
int sel=1;
|
|
||||||
Coordinate L_list({8,12});
|
|
||||||
#endif
|
|
||||||
int selm1=sel-1;
|
int selm1=sel-1;
|
||||||
std::vector<double> robust_list;
|
|
||||||
|
|
||||||
std::vector<double> wilson;
|
std::vector<double> wilson;
|
||||||
std::vector<double> dwf4;
|
std::vector<double> dwf4;
|
||||||
std::vector<double> dwf5;
|
std::vector<double> staggered;
|
||||||
|
|
||||||
if ( do_wilson ) {
|
|
||||||
int Ls=1;
|
int Ls=1;
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl;
|
std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl;
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
for(int l=0;l<L_list.size();l++){
|
for(int l=0;l<L_list.size();l++){
|
||||||
double robust;
|
wilson.push_back(Benchmark::DWF(Ls,L_list[l]));
|
||||||
wilson.push_back(Benchmark::DWF(Ls,L_list[l],robust));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ls=16;
|
Ls=12;
|
||||||
if ( do_dwf ) {
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl;
|
std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl;
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
for(int l=0;l<L_list.size();l++){
|
for(int l=0;l<L_list.size();l++){
|
||||||
double robust;
|
double result = Benchmark::DWF(Ls,L_list[l]) ;
|
||||||
double result = Benchmark::DWF(Ls,L_list[l],robust) ;
|
|
||||||
dwf4.push_back(result);
|
dwf4.push_back(result);
|
||||||
robust_list.push_back(robust);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( do_dwf ) {
|
/*
|
||||||
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Improved Staggered dslash 4D vectorised" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
for(int l=0;l<L_list.size();l++){
|
||||||
|
double result = Benchmark::Staggered(L_list[l]) ;
|
||||||
|
staggered.push_back(result);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl;
|
std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl;
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "L \t\t Wilson \t DWF4 " <<std::endl;
|
std::cout<<GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\tt Staggered" <<std::endl;
|
||||||
for(int l=0;l<L_list.size();l++){
|
for(int l=0;l<L_list.size();l++){
|
||||||
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l] <<std::endl;
|
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t\t "<<dwf4[l] <<std::endl;
|
||||||
}
|
}
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
}
|
|
||||||
|
|
||||||
int NN=NN_global;
|
int NN=NN_global;
|
||||||
if ( do_memory ) {
|
if ( do_memory ) {
|
||||||
@ -770,7 +658,6 @@ int main (int argc, char ** argv)
|
|||||||
Benchmark::Comms();
|
Benchmark::Comms();
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( do_dwf ) {
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl;
|
std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl;
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
@ -784,10 +671,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " Comparison point result: " << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl;
|
std::cout<<GridLogMessage << " Comparison point result: " << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl;
|
||||||
std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl;
|
std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl;
|
||||||
std::cout<<std::setprecision(3);
|
std::cout<<std::setprecision(3);
|
||||||
std::cout<<GridLogMessage << " Comparison point robustness: " << robust_list[sel] <<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
176
benchmarks/Benchmark_schur.cc
Normal file
176
benchmarks/Benchmark_schur.cc
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/Benchmark_dwf.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
Gamma::Algebra Gmu [] = {
|
||||||
|
Gamma::Algebra::GammaX,
|
||||||
|
Gamma::Algebra::GammaY,
|
||||||
|
Gamma::Algebra::GammaZ,
|
||||||
|
Gamma::Algebra::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
void benchDw(std::vector<int> & L, int Ls);
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
|
||||||
|
const int Ls=12;
|
||||||
|
std::vector< std::vector<int> > latts;
|
||||||
|
#if 1
|
||||||
|
latts.push_back(std::vector<int> ({24,24,24,24}) );
|
||||||
|
latts.push_back(std::vector<int> ({48,24,24,24}) );
|
||||||
|
latts.push_back(std::vector<int> ({96,24,24,24}) );
|
||||||
|
latts.push_back(std::vector<int> ({96,48,24,24}) );
|
||||||
|
// latts.push_back(std::vector<int> ({96,48,48,24}) );
|
||||||
|
// latts.push_back(std::vector<int> ({96,48,48,48}) );
|
||||||
|
#else
|
||||||
|
// latts.push_back(std::vector<int> ({96,48,48,48}) );
|
||||||
|
latts.push_back(std::vector<int> ({96,96,96,192}) );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t SchurDiagOne "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
|
||||||
|
for (int l=0;l<latts.size();l++){
|
||||||
|
std::vector<int> latt4 = latts[l];
|
||||||
|
std::cout << GridLogMessage <<"\t";
|
||||||
|
for(int d=0;d<Nd;d++){
|
||||||
|
std::cout<<latt4[d]<<"x";
|
||||||
|
}
|
||||||
|
std::cout <<Ls<<"\t" ;
|
||||||
|
benchDw (latt4,Ls);
|
||||||
|
}
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void benchDw(std::vector<int> & latt4, int Ls)
|
||||||
|
{
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// for Nc=3
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Dw : Ls*24*(7+48)= Ls*1320
|
||||||
|
//
|
||||||
|
// M5D: Ls*(4*2*Nc mul + 4*2*Nc madd ) = 3*4*2*Nc*Ls = Ls*72
|
||||||
|
// Meo: Ls*24*(7+48) + Ls*72 = Ls*1392
|
||||||
|
//
|
||||||
|
// Mee: 3*Ns*2*Nc*Ls // Chroma 6*N5*Nc*Ns
|
||||||
|
//
|
||||||
|
// LeemInv : 2*2*Nc*madd*Ls
|
||||||
|
// LeeInv : 2*2*Nc*madd*Ls
|
||||||
|
// DeeInv : 4*2*Nc*mul *Ls
|
||||||
|
// UeeInv : 2*2*Nc*madd*Ls
|
||||||
|
// UeemInv : 2*2*Nc*madd*Ls = Nc*Ls*(8+8+8+8+8) = 40*Nc*Ls// Chroma (10*N5 - 8)*Nc*Ns ~ (40 N5 - 32)Nc flops
|
||||||
|
// QUDA counts as dense LsxLs real matrix x Ls x NcNsNreim => Nc*4*2 x Ls^2 FMA = 16Nc Ls^2 flops
|
||||||
|
// Mpc => 1452*cbvol*2*Ls flops //
|
||||||
|
// => (1344+Ls*48)*Ls*cbvol*2 flops QUDA = 1920 @Ls=12 and 2112 @Ls=16
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
// long unsigned int single_site_flops = 8*Nc*(7+16*Nc)*Ls;
|
||||||
|
long unsigned int single_site_mpc_flops = 8*Nc*(7+16*Nc)*2*Ls + 40*Nc*2*Ls + 4*Nc*2*Ls;
|
||||||
|
long unsigned int single_site_quda_flops = 8*Nc*(7+16*Nc)*2*Ls + 16*Nc*Ls*Ls + 4*Nc*2*Ls;
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
|
||||||
|
|
||||||
|
ColourMatrixF cm = ComplexF(1.0,0.0);
|
||||||
|
|
||||||
|
int ncall=300;
|
||||||
|
RealD mass=0.1;
|
||||||
|
RealD M5 =1.8;
|
||||||
|
RealD NP = UGrid->_Nprocessors;
|
||||||
|
double volume=1; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
|
||||||
|
LatticeGaugeFieldF Umu(UGrid); Umu=Zero();
|
||||||
|
MobiusFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.5,0.5);
|
||||||
|
|
||||||
|
LatticeFermionF src_o (FrbGrid); src_o=1.0;
|
||||||
|
LatticeFermionF r_o (FrbGrid); r_o=Zero();
|
||||||
|
|
||||||
|
int order =151;
|
||||||
|
SchurDiagOneOperator<MobiusFermionF,LatticeFermionF> Mpc(Dw);
|
||||||
|
Chebyshev<LatticeFermionF> Cheby(0.0,60.0,order);
|
||||||
|
|
||||||
|
{
|
||||||
|
Mpc.Mpc(src_o,r_o);
|
||||||
|
Mpc.Mpc(src_o,r_o);
|
||||||
|
Mpc.Mpc(src_o,r_o);
|
||||||
|
|
||||||
|
double t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
Mpc.Mpc(src_o,r_o);
|
||||||
|
}
|
||||||
|
double t1=usecond();
|
||||||
|
|
||||||
|
double flops=(single_site_mpc_flops*volume*ncall); // Mpc has 1 - Moo^-1 Moe Mee^-1 Meo so CB cancels.
|
||||||
|
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0);
|
||||||
|
flops=(single_site_quda_flops*volume*ncall);
|
||||||
|
std::cout <<"\t"<<flops/(t1-t0)<<"\t"<<(t1-t0)/1000./1000.<<" s\t";
|
||||||
|
|
||||||
|
// Cheby uses MpcDagMpc so 2x flops
|
||||||
|
for(int i=0;i<1;i++){
|
||||||
|
Cheby(Mpc,src_o,r_o);
|
||||||
|
t0=usecond();
|
||||||
|
Cheby(Mpc,src_o,r_o);
|
||||||
|
t1=usecond();
|
||||||
|
flops=(single_site_mpc_flops*volume*2*order);
|
||||||
|
std::cout <<"\t"<<flops/(t1-t0);
|
||||||
|
flops=(single_site_quda_flops*volume*2*order);
|
||||||
|
std::cout <<"\t"<<flops/(t1-t0) << "\t" << (t1-t0)/1000./1000. <<" s";
|
||||||
|
std::cout <<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Dw.Report();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -87,6 +87,7 @@ int main (int argc, char ** argv)
|
|||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
}
|
}
|
||||||
|
ref = Zero();
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD c1=9.0/8.0;
|
RealD c1=9.0/8.0;
|
||||||
|
17
configure.ac
17
configure.ac
@ -309,10 +309,21 @@ case ${ac_gen_scalar} in
|
|||||||
esac
|
esac
|
||||||
|
|
||||||
##################### Compiler dependent choices
|
##################### Compiler dependent choices
|
||||||
case ${CXX} in
|
|
||||||
|
#Strip any optional compiler arguments from nvcc call (eg -ccbin) for compiler comparison
|
||||||
|
CXXBASE=${CXX}
|
||||||
|
CXXTEST=${CXX}
|
||||||
|
if echo "${CXX}" | grep -q "nvcc"; then
|
||||||
|
CXXTEST="nvcc"
|
||||||
|
fi
|
||||||
|
|
||||||
|
case ${CXXTEST} in
|
||||||
nvcc)
|
nvcc)
|
||||||
CXX="nvcc -x cu "
|
# CXX="nvcc -keep -v -x cu "
|
||||||
CXXLD="nvcc -link"
|
# CXXLD="nvcc -v -link"
|
||||||
|
CXX="${CXXBASE} -x cu "
|
||||||
|
CXXLD="${CXXBASE} -link"
|
||||||
|
# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
|
||||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
||||||
if test $ac_openmp = yes; then
|
if test $ac_openmp = yes; then
|
||||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
||||||
|
84
tests/IO/Test_openqcd_io.cc
Normal file
84
tests/IO/Test_openqcd_io.cc
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/io/Test_openqcd_io.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015 - 2020
|
||||||
|
|
||||||
|
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
#if !defined(GRID_COMMS_NONE)
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
|
auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd());
|
||||||
|
auto mpi_layout = GridDefaultMpi();
|
||||||
|
auto latt_size = GridDefaultLatt();
|
||||||
|
|
||||||
|
GridCartesian grid(latt_size, simd_layout, mpi_layout);
|
||||||
|
|
||||||
|
GridParallelRNG pRNG(&grid);
|
||||||
|
|
||||||
|
pRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
||||||
|
|
||||||
|
LatticeGaugeField Umu_ref(&grid);
|
||||||
|
LatticeGaugeField Umu_me(&grid);
|
||||||
|
LatticeGaugeField Umu_diff(&grid);
|
||||||
|
|
||||||
|
FieldMetaData header_ref;
|
||||||
|
FieldMetaData header_me;
|
||||||
|
|
||||||
|
Umu_ref = Zero();
|
||||||
|
Umu_me = Zero();
|
||||||
|
|
||||||
|
std::string file("/home/daniel/configs/openqcd/test_16x8_pbcn6");
|
||||||
|
|
||||||
|
if(GridCmdOptionExists(argv, argv + argc, "--config")) {
|
||||||
|
file = GridCmdOptionPayload(argv, argv + argc, "--config");
|
||||||
|
std::cout << "file: " << file << std::endl;
|
||||||
|
assert(!file.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
OpenQcdIOChromaReference::readConfiguration(Umu_ref, header_ref, file);
|
||||||
|
OpenQcdIO::readConfiguration(Umu_me, header_me, file);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << header_ref << std::endl;
|
||||||
|
std::cout << GridLogMessage << header_me << std::endl;
|
||||||
|
|
||||||
|
Umu_diff = Umu_ref - Umu_me;
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "norm2(Umu_ref) = " << norm2(Umu_ref)
|
||||||
|
<< " norm2(Umu_me) = " << norm2(Umu_me)
|
||||||
|
<< " norm2(Umu_diff) = " << norm2(Umu_diff) << std::endl;
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
#endif
|
||||||
|
}
|
126
tests/Test_innerproduct_norm.cc
Normal file
126
tests/Test_innerproduct_norm.cc
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_innerproduct_norm.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
|
const int nIter = 100;
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
GridCartesian *Grid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
|
||||||
|
GridCartesian *Grid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
GridParallelRNG pRNG_d(Grid_d);
|
||||||
|
GridParallelRNG pRNG_f(Grid_f);
|
||||||
|
|
||||||
|
std::vector<int> seeds_d({1, 2, 3, 4});
|
||||||
|
std::vector<int> seeds_f({5, 6, 7, 8});
|
||||||
|
|
||||||
|
pRNG_d.SeedFixedIntegers(seeds_d);
|
||||||
|
pRNG_f.SeedFixedIntegers(seeds_f);
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
LatticeFermionD x_d(Grid_d); random(pRNG_d, x_d);
|
||||||
|
LatticeFermionD y_d(Grid_d); random(pRNG_d, y_d);
|
||||||
|
LatticeFermionF x_f(Grid_f); random(pRNG_f, x_f);
|
||||||
|
LatticeFermionF y_f(Grid_f); random(pRNG_f, y_f);
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
GridStopWatch sw_ref;
|
||||||
|
GridStopWatch sw_res;
|
||||||
|
|
||||||
|
{ // double precision
|
||||||
|
ComplexD ip_d_ref, ip_d_res, diff_ip_d;
|
||||||
|
RealD norm2_d_ref, norm2_d_res, diff_norm2_d;
|
||||||
|
|
||||||
|
sw_ref.Reset();
|
||||||
|
sw_ref.Start();
|
||||||
|
for(int i = 0; i < nIter; ++i) {
|
||||||
|
ip_d_ref = innerProduct(x_d, y_d);
|
||||||
|
norm2_d_ref = norm2(x_d);
|
||||||
|
}
|
||||||
|
sw_ref.Stop();
|
||||||
|
|
||||||
|
sw_res.Reset();
|
||||||
|
sw_res.Start();
|
||||||
|
for(int i = 0; i < nIter; ++i) { innerProductNorm(ip_d_res, norm2_d_res, x_d, y_d); }
|
||||||
|
sw_res.Stop();
|
||||||
|
|
||||||
|
diff_ip_d = ip_d_ref - ip_d_res;
|
||||||
|
diff_norm2_d = norm2_d_ref - norm2_d_res;
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
std::cout << GridLogMessage << "Double: ip_ref = " << ip_d_ref << " ip_res = " << ip_d_res << " diff = " << diff_ip_d << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Double: norm2_ref = " << norm2_d_ref << " norm2_res = " << norm2_d_res << " diff = " << diff_norm2_d << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Double: time_ref = " << sw_ref.Elapsed() << " time_res = " << sw_res.Elapsed() << std::endl;
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
assert(diff_ip_d == 0.);
|
||||||
|
assert(diff_norm2_d == 0.);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Double: all checks passed" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
{ // single precision
|
||||||
|
ComplexD ip_f_ref, ip_f_res, diff_ip_f;
|
||||||
|
RealD norm2_f_ref, norm2_f_res, diff_norm2_f;
|
||||||
|
|
||||||
|
sw_ref.Reset();
|
||||||
|
sw_ref.Start();
|
||||||
|
for(int i = 0; i < nIter; ++i) {
|
||||||
|
ip_f_ref = innerProduct(x_f, y_f);
|
||||||
|
norm2_f_ref = norm2(x_f);
|
||||||
|
}
|
||||||
|
sw_ref.Stop();
|
||||||
|
|
||||||
|
sw_res.Reset();
|
||||||
|
sw_res.Start();
|
||||||
|
for(int i = 0; i < nIter; ++i) { innerProductNorm(ip_f_res, norm2_f_res, x_f, y_f); }
|
||||||
|
sw_res.Stop();
|
||||||
|
|
||||||
|
diff_ip_f = ip_f_ref - ip_f_res;
|
||||||
|
diff_norm2_f = norm2_f_ref - norm2_f_res;
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
std::cout << GridLogMessage << "Single: ip_ref = " << ip_f_ref << " ip_res = " << ip_f_res << " diff = " << diff_ip_f << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Single: norm2_ref = " << norm2_f_ref << " norm2_res = " << norm2_f_res << " diff = " << diff_norm2_f << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Single: time_ref = " << sw_ref.Elapsed() << " time_res = " << sw_res.Elapsed() << std::endl;
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
assert(diff_ip_f == 0.);
|
||||||
|
assert(diff_norm2_f == 0.);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Single: all checks passed" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -238,11 +238,11 @@ void TestWhat(What & Ddwf,
|
|||||||
RealD t1,t2;
|
RealD t1,t2;
|
||||||
|
|
||||||
SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf);
|
||||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
HermOpEO.MpcDagMpc(chi_e,dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
HermOpEO.MpcDagMpc(chi_o,dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
HermOpEO.MpcDagMpc(phi_e,dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
HermOpEO.MpcDagMpc(phi_o,dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e,dchi_e);
|
pDce = innerProduct(phi_e,dchi_e);
|
||||||
pDco = innerProduct(phi_o,dchi_o);
|
pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
@ -218,11 +218,11 @@ int main (int argc, char ** argv)
|
|||||||
RealD t1,t2;
|
RealD t1,t2;
|
||||||
|
|
||||||
SchurDiagMooeeOperator<DomainWallEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<DomainWallEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
|
||||||
HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2);
|
HermOpEO.MpcDagMpc(chi_e, dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2);
|
HermOpEO.MpcDagMpc(chi_o, dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2);
|
HermOpEO.MpcDagMpc(phi_e, dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2);
|
HermOpEO.MpcDagMpc(phi_o, dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e, dchi_e);
|
pDce = innerProduct(phi_e, dchi_e);
|
||||||
pDco = innerProduct(phi_o, dchi_o);
|
pDco = innerProduct(phi_o, dchi_o);
|
||||||
|
@ -216,11 +216,11 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
|
|
||||||
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
|
||||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
HermOpEO.MpcDagMpc(chi_e,dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
HermOpEO.MpcDagMpc(chi_o,dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
HermOpEO.MpcDagMpc(phi_e,dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
HermOpEO.MpcDagMpc(phi_o,dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e,dchi_e);
|
pDce = innerProduct(phi_e,dchi_e);
|
||||||
pDco = innerProduct(phi_o,dchi_o);
|
pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
@ -201,11 +201,11 @@ int main (int argc, char ** argv)
|
|||||||
RealD t1,t2;
|
RealD t1,t2;
|
||||||
|
|
||||||
SchurDiagMooeeOperator<GparityWilsonFermionR,FermionField> HermOpEO(Dw);
|
SchurDiagMooeeOperator<GparityWilsonFermionR,FermionField> HermOpEO(Dw);
|
||||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
HermOpEO.MpcDagMpc(chi_e,dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
HermOpEO.MpcDagMpc(chi_o,dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
HermOpEO.MpcDagMpc(phi_e,dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
HermOpEO.MpcDagMpc(phi_o,dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e,dchi_e);
|
pDce = innerProduct(phi_e,dchi_e);
|
||||||
pDco = innerProduct(phi_o,dchi_o);
|
pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
@ -220,11 +220,11 @@ int main (int argc, char ** argv)
|
|||||||
RealD t1,t2;
|
RealD t1,t2;
|
||||||
|
|
||||||
SchurDiagMooeeOperator<MobiusEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<MobiusEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
|
||||||
HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2);
|
HermOpEO.MpcDagMpc(chi_e, dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2);
|
HermOpEO.MpcDagMpc(chi_o, dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2);
|
HermOpEO.MpcDagMpc(phi_e, dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2);
|
HermOpEO.MpcDagMpc(phi_o, dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e, dchi_e);
|
pDce = innerProduct(phi_e, dchi_e);
|
||||||
pDco = innerProduct(phi_o, dchi_o);
|
pDco = innerProduct(phi_o, dchi_o);
|
||||||
|
@ -266,11 +266,11 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
|
|
||||||
SchurDiagMooeeOperator<MobiusFermionR,LatticeFermion> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<MobiusFermionR,LatticeFermion> HermOpEO(Ddwf);
|
||||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
HermOpEO.MpcDagMpc(chi_e,dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
HermOpEO.MpcDagMpc(chi_o,dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
HermOpEO.MpcDagMpc(phi_e,dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
HermOpEO.MpcDagMpc(phi_o,dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e,dchi_e);
|
pDce = innerProduct(phi_e,dchi_e);
|
||||||
pDco = innerProduct(phi_o,dchi_o);
|
pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
@ -270,11 +270,11 @@ int main (int argc, char ** argv)
|
|||||||
pickCheckerboard(Odd ,phi_o,phi);
|
pickCheckerboard(Odd ,phi_o,phi);
|
||||||
|
|
||||||
SchurDiagMooeeOperator<ImprovedStaggeredFermionR,FermionField> HermOpEO(Ds);
|
SchurDiagMooeeOperator<ImprovedStaggeredFermionR,FermionField> HermOpEO(Ds);
|
||||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
HermOpEO.MpcDagMpc(chi_e,dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
HermOpEO.MpcDagMpc(chi_o,dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
HermOpEO.MpcDagMpc(phi_e,dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
HermOpEO.MpcDagMpc(phi_o,dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e,dchi_e);
|
pDce = innerProduct(phi_e,dchi_e);
|
||||||
pDco = innerProduct(phi_o,dchi_o);
|
pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
@ -290,11 +290,11 @@ int main (int argc, char ** argv)
|
|||||||
pickCheckerboard(Odd ,phi_o,phi);
|
pickCheckerboard(Odd ,phi_o,phi);
|
||||||
|
|
||||||
SchurDiagMooeeOperator<ImprovedStaggeredFermion5DR,FermionField> HermOpEO(Ds);
|
SchurDiagMooeeOperator<ImprovedStaggeredFermion5DR,FermionField> HermOpEO(Ds);
|
||||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
HermOpEO.MpcDagMpc(chi_e,dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
HermOpEO.MpcDagMpc(chi_o,dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
HermOpEO.MpcDagMpc(phi_e,dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
HermOpEO.MpcDagMpc(phi_o,dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e,dchi_e);
|
pDce = innerProduct(phi_e,dchi_e);
|
||||||
pDco = innerProduct(phi_o,dchi_o);
|
pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
@ -207,11 +207,11 @@ int main (int argc, char ** argv)
|
|||||||
RealD t1,t2;
|
RealD t1,t2;
|
||||||
|
|
||||||
SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw);
|
SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw);
|
||||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
HermOpEO.MpcDagMpc(chi_e,dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
HermOpEO.MpcDagMpc(chi_o,dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
HermOpEO.MpcDagMpc(phi_e,dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
HermOpEO.MpcDagMpc(phi_o,dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e,dchi_e);
|
pDce = innerProduct(phi_e,dchi_e);
|
||||||
pDco = innerProduct(phi_o,dchi_o);
|
pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
@ -208,11 +208,11 @@ int main (int argc, char ** argv)
|
|||||||
RealD t1,t2;
|
RealD t1,t2;
|
||||||
|
|
||||||
SchurDiagMooeeOperator<WilsonTMFermionR,LatticeFermion> HermOpEO(Dw);
|
SchurDiagMooeeOperator<WilsonTMFermionR,LatticeFermion> HermOpEO(Dw);
|
||||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
HermOpEO.MpcDagMpc(chi_e,dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
HermOpEO.MpcDagMpc(chi_o,dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
HermOpEO.MpcDagMpc(phi_e,dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
HermOpEO.MpcDagMpc(phi_o,dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e,dchi_e);
|
pDce = innerProduct(phi_e,dchi_e);
|
||||||
pDco = innerProduct(phi_o,dchi_o);
|
pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
@ -280,11 +280,11 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
|
|
||||||
SchurDiagMooeeOperator<ZMobiusFermionR,LatticeFermion> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<ZMobiusFermionR,LatticeFermion> HermOpEO(Ddwf);
|
||||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
HermOpEO.MpcDagMpc(chi_e,dchi_e);
|
||||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
HermOpEO.MpcDagMpc(chi_o,dchi_o);
|
||||||
|
|
||||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
HermOpEO.MpcDagMpc(phi_e,dphi_e);
|
||||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
HermOpEO.MpcDagMpc(phi_o,dphi_o);
|
||||||
|
|
||||||
pDce = innerProduct(phi_e,dchi_e);
|
pDce = innerProduct(phi_e,dchi_e);
|
||||||
pDco = innerProduct(phi_o,dchi_o);
|
pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
@ -70,9 +70,6 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
SU3::HotConfiguration(RNG4,Umu);
|
SU3::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
TrivialPrecon<LatticeFermion> simple;
|
|
||||||
|
|
||||||
PrecGeneralisedConjugateResidual<LatticeFermion> PGCR(1.0e-6,10000,simple,4,160);
|
|
||||||
|
|
||||||
ConjugateResidual<LatticeFermion> CR(1.0e-6,10000);
|
ConjugateResidual<LatticeFermion> CR(1.0e-6,10000);
|
||||||
|
|
||||||
@ -86,15 +83,19 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage<<"* Solving with MdagM VPGCR "<<std::endl;
|
std::cout<<GridLogMessage<<"* Solving with MdagM VPGCR "<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
|
std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
|
||||||
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||||
|
TrivialPrecon<LatticeFermion> simple;
|
||||||
|
PrecGeneralisedConjugateResidual<LatticeFermion> PGCR(1.0e-6,10000,HermOp,simple,4,160);
|
||||||
|
|
||||||
result=Zero();
|
result=Zero();
|
||||||
PGCR(HermOp,src,result);
|
PGCR(src,result);
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
|
std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"* Solving with g5-VPGCR "<<std::endl;
|
std::cout<<GridLogMessage<<"* Solving with g5-VPGCR "<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
|
std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
|
||||||
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> g5HermOp(Ddwf);
|
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> g5HermOp(Ddwf);
|
||||||
|
PrecGeneralisedConjugateResidual<LatticeFermion> PGCR5(1.0e-6,10000,g5HermOp,simple,4,160);
|
||||||
result=Zero();
|
result=Zero();
|
||||||
PGCR(g5HermOp,src,result);
|
PGCR5(src,result);
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
|
std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"* Solving with MdagM-CR "<<std::endl;
|
std::cout<<GridLogMessage<<"* Solving with MdagM-CR "<<std::endl;
|
||||||
|
@ -128,9 +128,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
||||||
HermOp4d.Report();
|
|
||||||
}
|
}
|
||||||
Ds4d.Report();
|
|
||||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
@ -148,9 +146,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
||||||
HermOp.Report();
|
|
||||||
}
|
}
|
||||||
Ds.Report();
|
|
||||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||||
@ -167,10 +163,8 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
||||||
HermOp.Report();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ds.Report();
|
|
||||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||||
@ -187,9 +181,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
||||||
HermOp.Report();
|
|
||||||
}
|
}
|
||||||
Ds.Report();
|
|
||||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||||
@ -206,9 +198,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
||||||
HermOp.Report();
|
|
||||||
}
|
}
|
||||||
Ds.Report();
|
|
||||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
|
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
|
||||||
@ -232,7 +222,6 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
||||||
// HermOp4d.Report();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -220,7 +220,7 @@ void run(const TestParams ¶ms){
|
|||||||
|
|
||||||
GridStopWatch CGTimer;
|
GridStopWatch CGTimer;
|
||||||
|
|
||||||
typename RunParamsOuter::HermOpType<MobiusFermionD> HermOpEO_outer(D_outer);
|
typename RunParamsOuter::template HermOpType<MobiusFermionD> HermOpEO_outer(D_outer);
|
||||||
|
|
||||||
CGTimer.Start();
|
CGTimer.Start();
|
||||||
CG_outer(HermOpEO_outer, src_o_outer, result_o_outer);
|
CG_outer(HermOpEO_outer, src_o_outer, result_o_outer);
|
||||||
|
Loading…
Reference in New Issue
Block a user