mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-14 22:07:05 +01:00
Compare commits
126 Commits
feature/hd
...
feature/ei
Author | SHA1 | Date | |
---|---|---|---|
86a9cc8c27 | |||
c5c2dbc0ce | |||
5aa60be17d | |||
2e652431e5 | |||
8b5b55b682 | |||
0e3c49f687 | |||
cb7ee37562 | |||
82f71643a4 | |||
ea08f193e7 | |||
2bb2c68e15 | |||
efe5bc6a3c | |||
384da487bd | |||
ee1de82a53 | |||
2b576fc185 | |||
b01b7f761a | |||
c83471bfd0 | |||
ab0c5d77fb | |||
779e3c7442 | |||
0c570824f2 | |||
0dd1bdfa94 | |||
1d65e2f62c | |||
93920c4811 | |||
6859a3e1d4 | |||
21ca182c36 | |||
053b4dd495 | |||
42bb5f0721 | |||
253bcc3426 | |||
a887206413 | |||
591ebb6213 | |||
56e2f7d088 | |||
525418abfb | |||
5f780806c2 | |||
3c6ffcb48c | |||
87984ece7d | |||
e9b295f967 | |||
224cbf0453 | |||
c1e57d4357 | |||
6b64727161 | |||
04863f8f38 | |||
04927d2e40 | |||
7caed4edd9 | |||
59c51d2c35 | |||
ff53b231c8 | |||
fc19cf905b | |||
2a1387e992 | |||
9bfa51bffb | |||
38532753f4 | |||
949be9605c | |||
63cf201ee7 | |||
c8af498a2a | |||
ddb192bac7 | |||
7666300a6f | |||
4a4b9e305d | |||
9b2d2d0fc3 | |||
5011753f4f | |||
dbaeefaeef | |||
dee96cbf82 | |||
dd3ebc2ce4 | |||
103e7ae2f0 | |||
29ae5615c0 | |||
6240e02619 | |||
f4033ad8cb | |||
f1fe444d4f | |||
dae820aa96 | |||
5daf176f4a | |||
e96c86ec14 | |||
c2c3cad20d | |||
edec9ee2e2 | |||
ed70cce542 | |||
4701201b5f | |||
0782b76ed4 | |||
0896f2cead | |||
181709bba4 | |||
091d5c605e | |||
90229cfb0f | |||
0475c46ecb | |||
3cca10e617 | |||
327da332bb | |||
43dc2814dd | |||
f3a8d039a2 | |||
4e864e56c9 | |||
014dbfa464 | |||
3b0e07882f | |||
8e81a811d0 | |||
96e8e44fd4 | |||
5fc8a273e7 | |||
d671a63e78 | |||
856d168e41 | |||
6235c7ba98 | |||
7e13724882 | |||
b6cbdd2aa3 | |||
a2188ea875 | |||
989af65807 | |||
60db3133d3 | |||
c9b737a4e7 | |||
037bb6ea73 | |||
05ebc458e2 | |||
3753508957 | |||
c1677fccf6 | |||
35e8e31749 | |||
34813e9b04 | |||
373cf61abb | |||
4e8fbc4b49 | |||
516ac1d4d5 | |||
318f63eb34 | |||
16503d7532 | |||
0fa93383b7 | |||
0a827aa7bf | |||
165c68e28e | |||
b32b1ca642 | |||
9479bc8486 | |||
10192dfc71 | |||
c69a3b6ef6 | |||
2ed39ebb7a | |||
96671bbb24 | |||
0ca1992151 | |||
df2b0c4e79 | |||
5d834486c9 | |||
f7373e97a4 | |||
b8bd8cd2ae | |||
c7637a84ad | |||
a7772c827b | |||
8e83398861 | |||
843ca9350a | |||
f47b2b6e13 | |||
4180a4a8a7 |
@ -22,8 +22,18 @@
|
||||
#undef __CUDACC__
|
||||
#undef __CUDA_ARCH__
|
||||
#define __NVCC__REDEFINE__
|
||||
#endif
|
||||
|
||||
/* SYCL save and restore compile environment*/
|
||||
#ifdef __SYCL_DEVICE_ONLY__
|
||||
#pragma push
|
||||
#pragma push_macro("__SYCL_DEVICE_ONLY__")
|
||||
#undef __SYCL_DEVICE_ONLY__
|
||||
#undef EIGEN_USE_SYCL
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
#endif
|
||||
|
||||
|
||||
#include <Grid/Eigen/Dense>
|
||||
#include <Grid/Eigen/unsupported/CXX11/Tensor>
|
||||
|
||||
@ -35,7 +45,14 @@
|
||||
#pragma pop
|
||||
#endif
|
||||
|
||||
/*SYCL restore*/
|
||||
#ifdef __SYCL__REDEFINE__
|
||||
#pragma pop_macro("__SYCL_DEVICE_ONLY__")
|
||||
#pragma pop
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -39,14 +39,18 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/algorithms/approx/Remez.h>
|
||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||
#include <Grid/algorithms/approx/Forecast.h>
|
||||
#include <Grid/algorithms/approx/RemezGeneral.h>
|
||||
#include <Grid/algorithms/approx/ZMobius.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/Deflation.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/BiCGSTAB.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||
#include <Grid/algorithms/iterative/NormalEquations.h>
|
||||
#include <Grid/algorithms/iterative/SchurRedBlack.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
|
||||
#include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h>
|
||||
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
|
||||
#include <Grid/algorithms/iterative/MinimalResidual.h>
|
||||
|
@ -541,17 +541,14 @@ public:
|
||||
///////////////////////
|
||||
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
|
||||
|
||||
RealD M (const CoarseVector &in, CoarseVector &out){
|
||||
|
||||
void M (const CoarseVector &in, CoarseVector &out)
|
||||
{
|
||||
conformable(_grid,in.Grid());
|
||||
conformable(in.Grid(),out.Grid());
|
||||
|
||||
// RealD Nin = norm2(in);
|
||||
SimpleCompressor<siteVector> compressor;
|
||||
|
||||
double comms_usec = -usecond();
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
comms_usec += usecond();
|
||||
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
@ -565,12 +562,7 @@ public:
|
||||
typedef decltype(coalescedRead(in_v[0])) calcVector;
|
||||
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
|
||||
|
||||
GridStopWatch ArithmeticTimer;
|
||||
int osites=Grid()->oSites();
|
||||
// double flops = osites*Nsimd*nbasis*nbasis*8.0*geom.npoint;
|
||||
// double bytes = osites*nbasis*nbasis*geom.npoint*sizeof(CComplex);
|
||||
double usecs =-usecond();
|
||||
// assert(geom.npoint==9);
|
||||
|
||||
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
||||
int ss = sss/nbasis;
|
||||
@ -598,23 +590,9 @@ public:
|
||||
}
|
||||
coalescedWrite(out_v[ss](b),res,lane);
|
||||
});
|
||||
usecs +=usecond();
|
||||
|
||||
double nrm_usec=-usecond();
|
||||
RealD Nout= norm2(out);
|
||||
nrm_usec+=usecond();
|
||||
|
||||
/*
|
||||
std::cout << GridLogMessage << "\tNorm " << nrm_usec << " us" <<std::endl;
|
||||
std::cout << GridLogMessage << "\tHalo " << comms_usec << " us" <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << usecs << " us" <<std::endl;
|
||||
std::cout << GridLogMessage << "\t mflop/s " << flops/usecs<<std::endl;
|
||||
std::cout << GridLogMessage << "\t MB/s " << bytes/usecs<<std::endl;
|
||||
*/
|
||||
return Nout;
|
||||
};
|
||||
|
||||
RealD Mdag (const CoarseVector &in, CoarseVector &out)
|
||||
void Mdag (const CoarseVector &in, CoarseVector &out)
|
||||
{
|
||||
if(hermitian) {
|
||||
// corresponds to Petrov-Galerkin coarsening
|
||||
@ -625,7 +603,6 @@ public:
|
||||
G5C(tmp, in);
|
||||
M(tmp, out);
|
||||
G5C(out, out);
|
||||
return norm2(out);
|
||||
}
|
||||
};
|
||||
void MdirComms(const CoarseVector &in)
|
||||
@ -870,8 +847,6 @@ public:
|
||||
auto A_self = A[self_stencil].View();
|
||||
|
||||
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
||||
// if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });}
|
||||
// accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_self[ss](j,i),A_self(ss)(j,i)+iZProj_v(ss)); });
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -43,7 +43,6 @@ NAMESPACE_BEGIN(Grid);
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class LinearOperatorBase {
|
||||
public:
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
|
||||
@ -94,7 +93,10 @@ public:
|
||||
_Mat.Mdag(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.MdagM(in,out,n1,n2);
|
||||
_Mat.MdagM(in,out);
|
||||
ComplexD dot = innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
_Mat.MdagM(in,out);
|
||||
@ -131,17 +133,14 @@ public:
|
||||
assert(0);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.MdagM(in,out,n1,n2);
|
||||
out = out + _shift*in;
|
||||
|
||||
ComplexD dot;
|
||||
dot= innerProduct(in,out);
|
||||
HermOp(in,out);
|
||||
ComplexD dot = innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
_Mat.MdagM(in,out);
|
||||
out = out + _shift*in;
|
||||
}
|
||||
};
|
||||
|
||||
@ -170,7 +169,7 @@ public:
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.M(in,out);
|
||||
HermOp(in,out);
|
||||
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
@ -208,212 +207,305 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Even Odd Schur decomp operators; there are several
|
||||
// ways to introduce the even odd checkerboarding
|
||||
//////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////
|
||||
// Even Odd Schur decomp operators; there are several
|
||||
// ways to introduce the even odd checkerboarding
|
||||
//////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class SchurOperatorBase : public LinearOperatorBase<Field> {
|
||||
public:
|
||||
virtual RealD Mpc (const Field &in, Field &out) =0;
|
||||
virtual RealD MpcDag (const Field &in, Field &out) =0;
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = in.Checkerboard();
|
||||
ni=Mpc(in,tmp);
|
||||
no=MpcDag(tmp,out);
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MpcDagMpc(in,out,n1,n2);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
MpcDag(in,out);
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
assert(0); // must coarsen the unpreconditioned system
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
}
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||
assert(0);
|
||||
};
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||
public:
|
||||
Matrix &_Mat;
|
||||
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = !in.Checkerboard();
|
||||
//std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
|
||||
|
||||
_Mat.Meooe(in,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
|
||||
//std::cout << "cb in " << in.Checkerboard() << " cb out " << out.Checkerboard() << std::endl;
|
||||
_Mat.Mooee(in,out);
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
|
||||
_Mat.MooeeDag(in,out);
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagOneOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.MooeeInv(out,tmp);
|
||||
_Mat.Meooe(tmp,out);
|
||||
_Mat.MooeeInv(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInvDag(in,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagTwoOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInv(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in,out);
|
||||
_Mat.MooeeInvDag(out,tmp);
|
||||
_Mat.MeooeDag(tmp,out);
|
||||
_Mat.MooeeInvDag(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
|
||||
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
|
||||
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Staggered use
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
Field tmp;
|
||||
RealD mass;
|
||||
double tMpc;
|
||||
double tIP;
|
||||
double tMeo;
|
||||
double taxpby_norm;
|
||||
uint64_t ncall;
|
||||
public:
|
||||
void Report(void)
|
||||
{
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
|
||||
}
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||
{
|
||||
assert( _Mat.isTrivialEE() );
|
||||
mass = _Mat.Mass();
|
||||
tMpc=0;
|
||||
tIP =0;
|
||||
tMeo=0;
|
||||
taxpby_norm=0;
|
||||
ncall=0;
|
||||
}
|
||||
template<class Field>
|
||||
class SchurOperatorBase : public LinearOperatorBase<Field> {
|
||||
public:
|
||||
virtual void Mpc (const Field &in, Field &out) =0;
|
||||
virtual void MpcDag (const Field &in, Field &out) =0;
|
||||
virtual void MpcDagMpc(const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = in.Checkerboard();
|
||||
Mpc(in,tmp);
|
||||
MpcDag(tmp,out);
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
n2 = Mpc(in,out);
|
||||
tMpc+=usecond();
|
||||
tIP-=usecond();
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
tIP+=usecond();
|
||||
n1 = real(dot);
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MpcDagMpc(in,out);
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMpc+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
axpby(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MpcDagMpc(in,out);
|
||||
}
|
||||
virtual RealD Mpc (const Field &in, Field &out)
|
||||
{
|
||||
void Op (const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
MpcDag(in,out);
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
assert(0); // must coarsen the unpreconditioned system
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
}
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||
assert(0);
|
||||
};
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||
public:
|
||||
Matrix &_Mat;
|
||||
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual void Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = !in.Checkerboard();
|
||||
|
||||
_Mat.Meooe(in,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
_Mat.Mooee(in,out);
|
||||
axpy(out,-1.0,tmp,out);
|
||||
}
|
||||
virtual void MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
_Mat.MooeeDag(in,out);
|
||||
axpy(out,-1.0,tmp,out);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagOneOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual void Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.MooeeInv(out,tmp);
|
||||
_Mat.Meooe(tmp,out);
|
||||
_Mat.MooeeInv(out,tmp);
|
||||
axpy(out,-1.0,tmp,in);
|
||||
}
|
||||
virtual void MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInvDag(in,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
axpy(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagTwoOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual void Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInv(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
|
||||
axpy(out,-1.0,tmp,in);
|
||||
}
|
||||
virtual void MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in,out);
|
||||
_Mat.MooeeInvDag(out,tmp);
|
||||
_Mat.MeooeDag(tmp,out);
|
||||
_Mat.MooeeInvDag(out,tmp);
|
||||
|
||||
axpy(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class NonHermitianSchurOperatorBase : public LinearOperatorBase<Field>
|
||||
{
|
||||
public:
|
||||
virtual void Mpc (const Field& in, Field& out) = 0;
|
||||
virtual void MpcDag (const Field& in, Field& out) = 0;
|
||||
virtual void MpcDagMpc(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = in.Checkerboard();
|
||||
Mpc(in,tmp);
|
||||
MpcDag(tmp,out);
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field& in, Field& out, RealD& n1, RealD& n2) {
|
||||
assert(0);
|
||||
}
|
||||
virtual void HermOp(const Field& in, Field& out) {
|
||||
assert(0);
|
||||
}
|
||||
void Op(const Field& in, Field& out) {
|
||||
Mpc(in, out);
|
||||
}
|
||||
void AdjOp(const Field& in, Field& out) {
|
||||
MpcDag(in, out);
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag(const Field& in, Field& out) {
|
||||
assert(0); // must coarsen the unpreconditioned system
|
||||
}
|
||||
void OpDir(const Field& in, Field& out, int dir, int disp) {
|
||||
assert(0);
|
||||
}
|
||||
void OpDirAll(const Field& in, std::vector<Field>& out){
|
||||
assert(0);
|
||||
};
|
||||
};
|
||||
|
||||
template<class Matrix, class Field>
|
||||
class NonHermitianSchurDiagMooeeOperator : public NonHermitianSchurOperatorBase<Field>
|
||||
{
|
||||
public:
|
||||
Matrix& _Mat;
|
||||
NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
|
||||
virtual void Mpc(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = !in.Checkerboard();
|
||||
|
||||
_Mat.Meooe(in, tmp);
|
||||
_Mat.MooeeInv(tmp, out);
|
||||
_Mat.Meooe(out, tmp);
|
||||
|
||||
_Mat.Mooee(in, out);
|
||||
|
||||
axpy(out, -1.0, tmp, out);
|
||||
}
|
||||
virtual void MpcDag(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in, tmp);
|
||||
_Mat.MooeeInvDag(tmp, out);
|
||||
_Mat.MeooeDag(out, tmp);
|
||||
|
||||
_Mat.MooeeDag(in, out);
|
||||
|
||||
axpy(out, -1.0, tmp, out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase<Field>
|
||||
{
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
|
||||
public:
|
||||
NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
|
||||
virtual void Mpc(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.Meooe(in, out);
|
||||
_Mat.MooeeInv(out, tmp);
|
||||
_Mat.Meooe(tmp, out);
|
||||
_Mat.MooeeInv(out, tmp);
|
||||
|
||||
axpy(out, -1.0, tmp, in);
|
||||
}
|
||||
virtual void MpcDag(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInvDag(in, out);
|
||||
_Mat.MeooeDag(out, tmp);
|
||||
_Mat.MooeeInvDag(tmp, out);
|
||||
_Mat.MeooeDag(out, tmp);
|
||||
|
||||
axpy(out, -1.0, tmp, in);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Matrix, class Field>
|
||||
class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Field>
|
||||
{
|
||||
protected:
|
||||
Matrix& _Mat;
|
||||
|
||||
public:
|
||||
NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
|
||||
|
||||
virtual void Mpc(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInv(in, out);
|
||||
_Mat.Meooe(out, tmp);
|
||||
_Mat.MooeeInv(tmp, out);
|
||||
_Mat.Meooe(out, tmp);
|
||||
|
||||
axpy(out, -1.0, tmp, in);
|
||||
}
|
||||
virtual void MpcDag(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in, out);
|
||||
_Mat.MooeeInvDag(out, tmp);
|
||||
_Mat.MeooeDag(tmp, out);
|
||||
_Mat.MooeeInvDag(out, tmp);
|
||||
|
||||
axpy(out, -1.0, tmp, in);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
|
||||
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
|
||||
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Staggered use
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
Field tmp;
|
||||
RealD mass;
|
||||
public:
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||
{
|
||||
assert( _Mat.isTrivialEE() );
|
||||
mass = _Mat.Mass();
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
Mpc(in,out);
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
n1 = real(dot);
|
||||
n2 =0.0;
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
// _Mat.Meooe(in,out);
|
||||
// _Mat.Meooe(out,tmp);
|
||||
// axpby(out,-1.0,mass*mass,tmp,in);
|
||||
}
|
||||
virtual void Mpc (const Field &in, Field &out)
|
||||
{
|
||||
Field tmp(in.Grid());
|
||||
Field tmp2(in.Grid());
|
||||
|
||||
// _Mat.Mooee(in,out);
|
||||
// _Mat.Mooee(out,tmp);
|
||||
|
||||
// std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
|
||||
_Mat.Mooee(in,out);
|
||||
_Mat.Mooee(out,tmp);
|
||||
// std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
|
||||
|
||||
tMeo-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMeo+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
return nn;
|
||||
axpby(out,-1.0,mass*mass,tmp,in);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
return Mpc(in,out);
|
||||
virtual void MpcDag (const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
assert(0);// Never need with staggered
|
||||
@ -421,7 +513,6 @@ public:
|
||||
};
|
||||
template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for functions of operators
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
@ -38,16 +38,12 @@ template<class Field> class SparseMatrixBase {
|
||||
public:
|
||||
virtual GridBase *Grid(void) =0;
|
||||
// Full checkerboar operations
|
||||
virtual RealD M (const Field &in, Field &out)=0;
|
||||
virtual RealD Mdag (const Field &in, Field &out)=0;
|
||||
virtual void MdagM(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
Field tmp (in.Grid());
|
||||
ni=M(in,tmp);
|
||||
no=Mdag(tmp,out);
|
||||
}
|
||||
virtual void M (const Field &in, Field &out)=0;
|
||||
virtual void Mdag (const Field &in, Field &out)=0;
|
||||
virtual void MdagM(const Field &in, Field &out) {
|
||||
RealD ni, no;
|
||||
MdagM(in,out,ni,no);
|
||||
Field tmp (in.Grid());
|
||||
M(in,tmp);
|
||||
Mdag(tmp,out);
|
||||
}
|
||||
virtual void Mdiag (const Field &in, Field &out)=0;
|
||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
||||
|
@ -234,10 +234,8 @@ public:
|
||||
|
||||
GridBase *grid=in.Grid();
|
||||
|
||||
// std::cout << "Chevyshef(): in.Grid()="<<in.Grid()<<std::endl;
|
||||
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
|
||||
|
||||
int vol=grid->gSites();
|
||||
typedef typename Field::vector_type vector_type;
|
||||
|
||||
Field T0(grid); T0 = in;
|
||||
Field T1(grid);
|
||||
@ -258,14 +256,28 @@ public:
|
||||
// out = ()*T0 + Coeffs[1]*T1;
|
||||
axpby(out,0.5*Coeffs[0],Coeffs[1],T0,T1);
|
||||
for(int n=2;n<order;n++){
|
||||
|
||||
|
||||
Linop.HermOp(*Tn,y);
|
||||
// y=xscale*y+mscale*(*Tn);
|
||||
// *Tnp=2.0*y-(*Tnm);
|
||||
// out=out+Coeffs[n]* (*Tnp);
|
||||
#if 0
|
||||
auto y_v = y.View();
|
||||
auto Tn_v = Tn->View();
|
||||
auto Tnp_v = Tnp->View();
|
||||
auto Tnm_v = Tnm->View();
|
||||
constexpr int Nsimd = vector_type::Nsimd();
|
||||
accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, {
|
||||
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
|
||||
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
|
||||
});
|
||||
if ( Coeffs[n] != 0.0) {
|
||||
axpy(out,Coeffs[n],*Tnp,out);
|
||||
}
|
||||
#else
|
||||
axpby(y,xscale,mscale,y,(*Tn));
|
||||
axpby(*Tnp,2.0,-1.0,y,(*Tnm));
|
||||
axpy(out,Coeffs[n],*Tnp,out);
|
||||
if ( Coeffs[n] != 0.0) {
|
||||
axpy(out,Coeffs[n],*Tnp,out);
|
||||
}
|
||||
#endif
|
||||
// Cycle pointers to avoid copies
|
||||
Field *swizzle = Tnm;
|
||||
Tnm =Tn;
|
||||
|
473
Grid/algorithms/approx/RemezGeneral.cc
Normal file
473
Grid/algorithms/approx/RemezGeneral.cc
Normal file
@ -0,0 +1,473 @@
|
||||
#include<math.h>
|
||||
#include<stdio.h>
|
||||
#include<stdlib.h>
|
||||
#include<string>
|
||||
#include<iostream>
|
||||
#include<iomanip>
|
||||
#include<cassert>
|
||||
|
||||
#include<Grid/algorithms/approx/RemezGeneral.h>
|
||||
|
||||
|
||||
// Constructor
|
||||
AlgRemezGeneral::AlgRemezGeneral(double lower, double upper, long precision,
|
||||
bigfloat (*f)(bigfloat x, void *data), void *data): f(f),
|
||||
data(data),
|
||||
prec(precision),
|
||||
apstrt(lower), apend(upper), apwidt(upper - lower),
|
||||
n(0), d(0), pow_n(0), pow_d(0)
|
||||
{
|
||||
bigfloat::setDefaultPrecision(prec);
|
||||
|
||||
std::cout<<"Approximation bounds are ["<<apstrt<<","<<apend<<"]\n";
|
||||
std::cout<<"Precision of arithmetic is "<<precision<<std::endl;
|
||||
}
|
||||
|
||||
//Determine the properties of the numerator and denominator polynomials
|
||||
void AlgRemezGeneral::setupPolyProperties(int num_degree, int den_degree, PolyType num_type_in, PolyType den_type_in){
|
||||
pow_n = num_degree;
|
||||
pow_d = den_degree;
|
||||
|
||||
if(pow_n % 2 == 0 && num_type_in == PolyType::Odd) assert(0);
|
||||
if(pow_n % 2 == 1 && num_type_in == PolyType::Even) assert(0);
|
||||
|
||||
if(pow_d % 2 == 0 && den_type_in == PolyType::Odd) assert(0);
|
||||
if(pow_d % 2 == 1 && den_type_in == PolyType::Even) assert(0);
|
||||
|
||||
num_type = num_type_in;
|
||||
den_type = den_type_in;
|
||||
|
||||
num_pows.resize(pow_n+1);
|
||||
den_pows.resize(pow_d+1);
|
||||
|
||||
int n_in = 0;
|
||||
bool odd = num_type == PolyType::Full || num_type == PolyType::Odd;
|
||||
bool even = num_type == PolyType::Full || num_type == PolyType::Even;
|
||||
for(int i=0;i<=pow_n;i++){
|
||||
num_pows[i] = -1;
|
||||
if(i % 2 == 0 && even) num_pows[i] = n_in++;
|
||||
if(i % 2 == 1 && odd) num_pows[i] = n_in++;
|
||||
}
|
||||
|
||||
std::cout << n_in << " terms in numerator" << std::endl;
|
||||
--n_in; //power is 1 less than the number of terms, eg pow=1 a x^1 + b x^0
|
||||
|
||||
int d_in = 0;
|
||||
odd = den_type == PolyType::Full || den_type == PolyType::Odd;
|
||||
even = den_type == PolyType::Full || den_type == PolyType::Even;
|
||||
for(int i=0;i<=pow_d;i++){
|
||||
den_pows[i] = -1;
|
||||
if(i % 2 == 0 && even) den_pows[i] = d_in++;
|
||||
if(i % 2 == 1 && odd) den_pows[i] = d_in++;
|
||||
}
|
||||
|
||||
std::cout << d_in << " terms in denominator" << std::endl;
|
||||
--d_in;
|
||||
|
||||
n = n_in;
|
||||
d = d_in;
|
||||
}
|
||||
|
||||
//Setup algorithm
|
||||
void AlgRemezGeneral::reinitializeAlgorithm(){
|
||||
spread = 1.0e37;
|
||||
iter = 0;
|
||||
|
||||
neq = n + d + 1; //not +2 because highest-power term in denominator is fixed to 1
|
||||
|
||||
param.resize(neq);
|
||||
yy.resize(neq+1);
|
||||
|
||||
//Initialize linear equation temporaries
|
||||
A.resize(neq*neq);
|
||||
B.resize(neq);
|
||||
IPS.resize(neq);
|
||||
|
||||
//Initialize maximum and minimum errors
|
||||
xx.resize(neq+2);
|
||||
mm.resize(neq+1);
|
||||
initialGuess();
|
||||
|
||||
//Initialize search steps
|
||||
step.resize(neq+1);
|
||||
stpini();
|
||||
}
|
||||
|
||||
double AlgRemezGeneral::generateApprox(const int num_degree, const int den_degree,
|
||||
const PolyType num_type_in, const PolyType den_type_in,
|
||||
const double _tolerance, const int report_freq){
|
||||
//Setup the properties of the polynomial
|
||||
setupPolyProperties(num_degree, den_degree, num_type_in, den_type_in);
|
||||
|
||||
//Setup the algorithm
|
||||
reinitializeAlgorithm();
|
||||
|
||||
bigfloat tolerance = _tolerance;
|
||||
|
||||
//Iterate until convergance
|
||||
while (spread > tolerance) {
|
||||
if (iter++ % report_freq==0)
|
||||
std::cout<<"Iteration " <<iter-1<<" spread "<<(double)spread<<" delta "<<(double)delta << std::endl;
|
||||
|
||||
equations();
|
||||
if (delta < tolerance) {
|
||||
std::cout<<"Iteration " << iter-1 << " delta too small (" << delta << "<" << tolerance << "), try increasing precision\n";
|
||||
assert(0);
|
||||
};
|
||||
assert( delta>= tolerance );
|
||||
|
||||
search();
|
||||
}
|
||||
|
||||
int sign;
|
||||
double error = (double)getErr(mm[0],&sign);
|
||||
std::cout<<"Converged at "<<iter<<" iterations; error = "<<error<<std::endl;
|
||||
|
||||
// Return the maximum error in the approximation
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Initial values of maximal and minimal errors
|
||||
void AlgRemezGeneral::initialGuess(){
|
||||
// Supply initial guesses for solution points
|
||||
long ncheb = neq; // Degree of Chebyshev error estimate
|
||||
|
||||
// Find ncheb+1 extrema of Chebyshev polynomial
|
||||
bigfloat a = ncheb;
|
||||
bigfloat r;
|
||||
|
||||
mm[0] = apstrt;
|
||||
for (long i = 1; i < ncheb; i++) {
|
||||
r = 0.5 * (1 - cos((M_PI * i)/(double) a));
|
||||
//r *= sqrt_bf(r);
|
||||
r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
|
||||
mm[i] = apstrt + r * apwidt;
|
||||
}
|
||||
mm[ncheb] = apend;
|
||||
|
||||
a = 2.0 * ncheb;
|
||||
for (long i = 0; i <= ncheb; i++) {
|
||||
r = 0.5 * (1 - cos(M_PI * (2*i+1)/(double) a));
|
||||
//r *= sqrt_bf(r); // Squeeze to low end of interval
|
||||
r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
|
||||
xx[i] = apstrt + r * apwidt;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialise step sizes
|
||||
void AlgRemezGeneral::stpini(){
|
||||
xx[neq+1] = apend;
|
||||
delta = 0.25;
|
||||
step[0] = xx[0] - apstrt;
|
||||
for (int i = 1; i < neq; i++) step[i] = xx[i] - xx[i-1];
|
||||
step[neq] = step[neq-1];
|
||||
}
|
||||
|
||||
// Search for error maxima and minima
|
||||
void AlgRemezGeneral::search(){
|
||||
bigfloat a, q, xm, ym, xn, yn, xx1;
|
||||
int emsign, ensign, steps;
|
||||
|
||||
int meq = neq + 1;
|
||||
|
||||
bigfloat eclose = 1.0e30;
|
||||
bigfloat farther = 0l;
|
||||
|
||||
bigfloat xx0 = apstrt;
|
||||
|
||||
for (int i = 0; i < meq; i++) {
|
||||
steps = 0;
|
||||
xx1 = xx[i]; // Next zero
|
||||
if (i == meq-1) xx1 = apend;
|
||||
xm = mm[i];
|
||||
ym = getErr(xm,&emsign);
|
||||
q = step[i];
|
||||
xn = xm + q;
|
||||
if (xn < xx0 || xn >= xx1) { // Cannot skip over adjacent boundaries
|
||||
q = -q;
|
||||
xn = xm;
|
||||
yn = ym;
|
||||
ensign = emsign;
|
||||
} else {
|
||||
yn = getErr(xn,&ensign);
|
||||
if (yn < ym) {
|
||||
q = -q;
|
||||
xn = xm;
|
||||
yn = ym;
|
||||
ensign = emsign;
|
||||
}
|
||||
}
|
||||
|
||||
while(yn >= ym) { // March until error becomes smaller.
|
||||
if (++steps > 10)
|
||||
break;
|
||||
|
||||
ym = yn;
|
||||
xm = xn;
|
||||
emsign = ensign;
|
||||
a = xm + q;
|
||||
if (a == xm || a <= xx0 || a >= xx1)
|
||||
break;// Must not skip over the zeros either side.
|
||||
|
||||
xn = a;
|
||||
yn = getErr(xn,&ensign);
|
||||
}
|
||||
|
||||
mm[i] = xm; // Position of maximum
|
||||
yy[i] = ym; // Value of maximum
|
||||
|
||||
if (eclose > ym) eclose = ym;
|
||||
if (farther < ym) farther = ym;
|
||||
|
||||
xx0 = xx1; // Walk to next zero.
|
||||
} // end of search loop
|
||||
|
||||
q = (farther - eclose); // Decrease step size if error spread increased
|
||||
|
||||
if (eclose != 0.0) q /= eclose; // Relative error spread
|
||||
|
||||
if (q >= spread)
|
||||
delta *= 0.5; // Spread is increasing; decrease step size
|
||||
|
||||
spread = q;
|
||||
|
||||
for (int i = 0; i < neq; i++) {
|
||||
q = yy[i+1];
|
||||
if (q != 0.0) q = yy[i] / q - (bigfloat)1l;
|
||||
else q = 0.0625;
|
||||
if (q > (bigfloat)0.25) q = 0.25;
|
||||
q *= mm[i+1] - mm[i];
|
||||
step[i] = q * delta;
|
||||
}
|
||||
step[neq] = step[neq-1];
|
||||
|
||||
for (int i = 0; i < neq; i++) { // Insert new locations for the zeros.
|
||||
xm = xx[i] - step[i];
|
||||
|
||||
if (xm <= apstrt)
|
||||
continue;
|
||||
|
||||
if (xm >= apend)
|
||||
continue;
|
||||
|
||||
if (xm <= mm[i])
|
||||
xm = (bigfloat)0.5 * (mm[i] + xx[i]);
|
||||
|
||||
if (xm >= mm[i+1])
|
||||
xm = (bigfloat)0.5 * (mm[i+1] + xx[i]);
|
||||
|
||||
xx[i] = xm;
|
||||
}
|
||||
}
|
||||
|
||||
// Solve the equations
|
||||
void AlgRemezGeneral::equations(){
|
||||
bigfloat x, y, z;
|
||||
bigfloat *aa;
|
||||
|
||||
for (int i = 0; i < neq; i++) { // set up the equations for solution by simq()
|
||||
int ip = neq * i; // offset to 1st element of this row of matrix
|
||||
x = xx[i]; // the guess for this row
|
||||
y = func(x); // right-hand-side vector
|
||||
|
||||
z = (bigfloat)1l;
|
||||
aa = A.data()+ip;
|
||||
int t = 0;
|
||||
for (int j = 0; j <= pow_n; j++) {
|
||||
if(num_pows[j] != -1){ *aa++ = z; t++; }
|
||||
z *= x;
|
||||
}
|
||||
assert(t == n+1);
|
||||
|
||||
z = (bigfloat)1l;
|
||||
t = 0;
|
||||
for (int j = 0; j < pow_d; j++) {
|
||||
if(den_pows[j] != -1){ *aa++ = -y * z; t++; }
|
||||
z *= x;
|
||||
}
|
||||
assert(t == d);
|
||||
|
||||
B[i] = y * z; // Right hand side vector
|
||||
}
|
||||
|
||||
// Solve the simultaneous linear equations.
|
||||
if (simq()){
|
||||
std::cout<<"simq failed\n";
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients
|
||||
// from the solution vector param
|
||||
bigfloat AlgRemezGeneral::approx(const bigfloat x) const{
|
||||
// Work backwards toward the constant term.
|
||||
int c = n;
|
||||
bigfloat yn = param[c--]; // Highest order numerator coefficient
|
||||
for (int i = pow_n-1; i >= 0; i--) yn = x * yn + (num_pows[i] != -1 ? param[c--] : bigfloat(0l));
|
||||
|
||||
c = n+d;
|
||||
bigfloat yd = 1l; //Highest degree coefficient is 1.0
|
||||
for (int i = pow_d-1; i >= 0; i--) yd = x * yd + (den_pows[i] != -1 ? param[c--] : bigfloat(0l));
|
||||
|
||||
return(yn/yd);
|
||||
}
|
||||
|
||||
// Compute size and sign of the approximation error at x
|
||||
bigfloat AlgRemezGeneral::getErr(bigfloat x, int *sign) const{
|
||||
bigfloat f = func(x);
|
||||
bigfloat e = approx(x) - f;
|
||||
if (f != 0) e /= f;
|
||||
if (e < (bigfloat)0.0) {
|
||||
*sign = -1;
|
||||
e = -e;
|
||||
}
|
||||
else *sign = 1;
|
||||
|
||||
return(e);
|
||||
}
|
||||
|
||||
// Solve the system AX=B
|
||||
int AlgRemezGeneral::simq(){
|
||||
|
||||
int ip, ipj, ipk, ipn;
|
||||
int idxpiv;
|
||||
int kp, kp1, kpk, kpn;
|
||||
int nip, nkp;
|
||||
bigfloat em, q, rownrm, big, size, pivot, sum;
|
||||
bigfloat *aa;
|
||||
bigfloat *X = param.data();
|
||||
|
||||
int n = neq;
|
||||
int nm1 = n - 1;
|
||||
// Initialize IPS and X
|
||||
|
||||
int ij = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
IPS[i] = i;
|
||||
rownrm = 0.0;
|
||||
for(int j = 0; j < n; j++) {
|
||||
q = abs_bf(A[ij]);
|
||||
if(rownrm < q) rownrm = q;
|
||||
++ij;
|
||||
}
|
||||
if (rownrm == (bigfloat)0l) {
|
||||
std::cout<<"simq rownrm=0\n";
|
||||
return(1);
|
||||
}
|
||||
X[i] = (bigfloat)1.0 / rownrm;
|
||||
}
|
||||
|
||||
for (int k = 0; k < nm1; k++) {
|
||||
big = 0.0;
|
||||
idxpiv = 0;
|
||||
|
||||
for (int i = k; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipk = n*ip + k;
|
||||
size = abs_bf(A[ipk]) * X[ip];
|
||||
if (size > big) {
|
||||
big = size;
|
||||
idxpiv = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (big == (bigfloat)0l) {
|
||||
std::cout<<"simq big=0\n";
|
||||
return(2);
|
||||
}
|
||||
if (idxpiv != k) {
|
||||
int j = IPS[k];
|
||||
IPS[k] = IPS[idxpiv];
|
||||
IPS[idxpiv] = j;
|
||||
}
|
||||
kp = IPS[k];
|
||||
kpk = n*kp + k;
|
||||
pivot = A[kpk];
|
||||
kp1 = k+1;
|
||||
for (int i = kp1; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipk = n*ip + k;
|
||||
em = -A[ipk] / pivot;
|
||||
A[ipk] = -em;
|
||||
nip = n*ip;
|
||||
nkp = n*kp;
|
||||
aa = A.data()+nkp+kp1;
|
||||
for (int j = kp1; j < n; j++) {
|
||||
ipj = nip + j;
|
||||
A[ipj] = A[ipj] + em * *aa++;
|
||||
}
|
||||
}
|
||||
}
|
||||
kpn = n * IPS[n-1] + n - 1; // last element of IPS[n] th row
|
||||
if (A[kpn] == (bigfloat)0l) {
|
||||
std::cout<<"simq A[kpn]=0\n";
|
||||
return(3);
|
||||
}
|
||||
|
||||
|
||||
ip = IPS[0];
|
||||
X[0] = B[ip];
|
||||
for (int i = 1; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipj = n * ip;
|
||||
sum = 0.0;
|
||||
for (int j = 0; j < i; j++) {
|
||||
sum += A[ipj] * X[j];
|
||||
++ipj;
|
||||
}
|
||||
X[i] = B[ip] - sum;
|
||||
}
|
||||
|
||||
ipn = n * IPS[n-1] + n - 1;
|
||||
X[n-1] = X[n-1] / A[ipn];
|
||||
|
||||
for (int iback = 1; iback < n; iback++) {
|
||||
//i goes (n-1),...,1
|
||||
int i = nm1 - iback;
|
||||
ip = IPS[i];
|
||||
nip = n*ip;
|
||||
sum = 0.0;
|
||||
aa = A.data()+nip+i+1;
|
||||
for (int j= i + 1; j < n; j++)
|
||||
sum += *aa++ * X[j];
|
||||
X[i] = (X[i] - sum) / A[nip+i];
|
||||
}
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
void AlgRemezGeneral::csv(std::ostream & os) const{
|
||||
os << "Numerator" << std::endl;
|
||||
for(int i=0;i<=pow_n;i++){
|
||||
os << getCoeffNum(i) << "*x^" << i;
|
||||
if(i!=pow_n) os << " + ";
|
||||
}
|
||||
os << std::endl;
|
||||
|
||||
os << "Denominator" << std::endl;
|
||||
for(int i=0;i<=pow_d;i++){
|
||||
os << getCoeffDen(i) << "*x^" << i;
|
||||
if(i!=pow_d) os << " + ";
|
||||
}
|
||||
os << std::endl;
|
||||
|
||||
//For a true minimax solution the errors should all be equal and the signs should oscillate +-+-+- etc
|
||||
int sign;
|
||||
os << "Errors at maxima: coordinate, error, (sign)" << std::endl;
|
||||
for(int i=0;i<neq+1;i++){
|
||||
os << mm[i] << " " << getErr(mm[i],&sign) << " (" << sign << ")" << std::endl;
|
||||
}
|
||||
|
||||
os << "Scan over range:" << std::endl;
|
||||
int npt = 60;
|
||||
bigfloat dlt = (apend - apstrt)/bigfloat(npt-1);
|
||||
|
||||
for (bigfloat x=apstrt; x<=apend; x = x + dlt) {
|
||||
double f = evaluateFunc(x);
|
||||
double r = evaluateApprox(x);
|
||||
os<< x<<","<<r<<","<<f<<","<<r-f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
170
Grid/algorithms/approx/RemezGeneral.h
Normal file
170
Grid/algorithms/approx/RemezGeneral.h
Normal file
@ -0,0 +1,170 @@
|
||||
/*
|
||||
C.Kelly Jan 2020 based on implementation by M. Clark May 2005
|
||||
|
||||
AlgRemezGeneral is an implementation of the Remez algorithm for approximating an arbitrary function by a rational polynomial
|
||||
It includes optional restriction to odd/even polynomials for the numerator and/or denominator
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_ALG_REMEZ_GENERAL_H
|
||||
#define INCLUDED_ALG_REMEZ_GENERAL_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <Grid/GridStd.h>
|
||||
|
||||
#ifdef HAVE_LIBGMP
|
||||
#include "bigfloat.h"
|
||||
#else
|
||||
#include "bigfloat_double.h"
|
||||
#endif
|
||||
|
||||
|
||||
class AlgRemezGeneral{
|
||||
public:
|
||||
enum PolyType { Even, Odd, Full };
|
||||
|
||||
private:
|
||||
|
||||
// In GSL-style, pass the function as a function pointer. Any data required to evaluate the function is passed in as a void pointer
|
||||
bigfloat (*f)(bigfloat x, void *data);
|
||||
void *data;
|
||||
|
||||
// The approximation parameters
|
||||
std::vector<bigfloat> param;
|
||||
bigfloat norm;
|
||||
|
||||
// The number of non-zero terms in the numerator and denominator
|
||||
int n, d;
|
||||
// The numerator and denominator degree (i.e. the largest power)
|
||||
int pow_n, pow_d;
|
||||
|
||||
// Specify if the numerator and/or denominator are odd/even polynomials
|
||||
PolyType num_type;
|
||||
PolyType den_type;
|
||||
std::vector<int> num_pows; //contains the mapping, with -1 if not present
|
||||
std::vector<int> den_pows;
|
||||
|
||||
// The bounds of the approximation
|
||||
bigfloat apstrt, apwidt, apend;
|
||||
|
||||
// Variables used to calculate the approximation
|
||||
int nd1, iter;
|
||||
std::vector<bigfloat> xx;
|
||||
std::vector<bigfloat> mm;
|
||||
std::vector<bigfloat> step;
|
||||
|
||||
bigfloat delta, spread;
|
||||
|
||||
// Variables used in search
|
||||
std::vector<bigfloat> yy;
|
||||
|
||||
// Variables used in solving linear equations
|
||||
std::vector<bigfloat> A;
|
||||
std::vector<bigfloat> B;
|
||||
std::vector<int> IPS;
|
||||
|
||||
// The number of equations we must solve at each iteration (n+d+1)
|
||||
int neq;
|
||||
|
||||
// The precision of the GNU MP library
|
||||
long prec;
|
||||
|
||||
// Initialize member variables associated with the polynomial's properties
|
||||
void setupPolyProperties(int num_degree, int den_degree, PolyType num_type_in, PolyType den_type_in);
|
||||
|
||||
// Initial values of maximal and minmal errors
|
||||
void initialGuess();
|
||||
|
||||
// Initialise step sizes
|
||||
void stpini();
|
||||
|
||||
// Initialize the algorithm
|
||||
void reinitializeAlgorithm();
|
||||
|
||||
// Solve the equations
|
||||
void equations();
|
||||
|
||||
// Search for error maxima and minima
|
||||
void search();
|
||||
|
||||
// Calculate function required for the approximation
|
||||
inline bigfloat func(bigfloat x) const{
|
||||
return f(x, data);
|
||||
}
|
||||
|
||||
// Compute size and sign of the approximation error at x
|
||||
bigfloat getErr(bigfloat x, int *sign) const;
|
||||
|
||||
// Solve the system AX=B where X = param
|
||||
int simq();
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients from the solution vector param
|
||||
bigfloat approx(bigfloat x) const;
|
||||
|
||||
public:
|
||||
|
||||
AlgRemezGeneral(double lower, double upper, long prec,
|
||||
bigfloat (*f)(bigfloat x, void *data), void *data);
|
||||
|
||||
inline int getDegree(void) const{
|
||||
assert(n==d);
|
||||
return n;
|
||||
}
|
||||
// Reset the bounds of the approximation
|
||||
inline void setBounds(double lower, double upper) {
|
||||
apstrt = lower;
|
||||
apend = upper;
|
||||
apwidt = apend - apstrt;
|
||||
}
|
||||
|
||||
// Get the bounds of the approximation
|
||||
inline void getBounds(double &lower, double &upper) const{
|
||||
lower=(double)apstrt;
|
||||
upper=(double)apend;
|
||||
}
|
||||
|
||||
// Run the algorithm to generate the rational approximation
|
||||
double generateApprox(int num_degree, int den_degree,
|
||||
PolyType num_type, PolyType den_type,
|
||||
const double tolerance = 1e-15, const int report_freq = 1000);
|
||||
|
||||
inline double generateApprox(int num_degree, int den_degree,
|
||||
const double tolerance = 1e-15, const int report_freq = 1000){
|
||||
return generateApprox(num_degree, den_degree, Full, Full, tolerance, report_freq);
|
||||
}
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients from the
|
||||
// solution vector param
|
||||
inline double evaluateApprox(double x) const{
|
||||
return (double)approx((bigfloat)x);
|
||||
}
|
||||
|
||||
// Evaluate the rational form Q(x)/P(x) using coefficients from the solution vector param
|
||||
inline double evaluateInverseApprox(double x) const{
|
||||
return 1.0/(double)approx((bigfloat)x);
|
||||
}
|
||||
|
||||
// Calculate function required for the approximation
|
||||
inline double evaluateFunc(double x) const{
|
||||
return (double)func((bigfloat)x);
|
||||
}
|
||||
|
||||
// Calculate inverse function required for the approximation
|
||||
inline double evaluateInverseFunc(double x) const{
|
||||
return 1.0/(double)func((bigfloat)x);
|
||||
}
|
||||
|
||||
// Dump csv of function, approx and error
|
||||
void csv(std::ostream &os = std::cout) const;
|
||||
|
||||
// Get the coefficient of the term x^i in the numerator
|
||||
inline double getCoeffNum(const int i) const{
|
||||
return num_pows[i] == -1 ? 0. : double(param[num_pows[i]]);
|
||||
}
|
||||
// Get the coefficient of the term x^i in the denominator
|
||||
inline double getCoeffDen(const int i) const{
|
||||
if(i == pow_d) return 1.0;
|
||||
else return den_pows[i] == -1 ? 0. : double(param[den_pows[i]+n+1]);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
183
Grid/algorithms/approx/ZMobius.cc
Normal file
183
Grid/algorithms/approx/ZMobius.cc
Normal file
@ -0,0 +1,183 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/ZMobius.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/algorithms/approx/ZMobius.h>
|
||||
#include <Grid/algorithms/approx/RemezGeneral.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
NAMESPACE_BEGIN(Approx);
|
||||
|
||||
//Compute the tanh approximation
|
||||
inline double epsilonMobius(const double x, const std::vector<ComplexD> &w){
|
||||
int Ls = w.size();
|
||||
|
||||
ComplexD fxp = 1., fmp = 1.;
|
||||
for(int i=0;i<Ls;i++){
|
||||
fxp = fxp * ( w[i] + x );
|
||||
fmp = fmp * ( w[i] - x );
|
||||
}
|
||||
return ((fxp - fmp)/(fxp + fmp)).real();
|
||||
}
|
||||
inline double epsilonMobius(const double x, const std::vector<RealD> &w){
|
||||
int Ls = w.size();
|
||||
|
||||
double fxp = 1., fmp = 1.;
|
||||
for(int i=0;i<Ls;i++){
|
||||
fxp = fxp * ( w[i] + x );
|
||||
fmp = fmp * ( w[i] - x );
|
||||
}
|
||||
return (fxp - fmp)/(fxp + fmp);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Compute the tanh approximation in a form suitable for the Remez
|
||||
bigfloat epsilonMobius(bigfloat x, void* data){
|
||||
const std::vector<RealD> &omega = *( (std::vector<RealD> const*)data );
|
||||
bigfloat fxp(1.0);
|
||||
bigfloat fmp(1.0);
|
||||
|
||||
for(int i=0;i<omega.size();i++){
|
||||
fxp = fxp * ( bigfloat(omega[i]) + x);
|
||||
fmp = fmp * ( bigfloat(omega[i]) - x);
|
||||
}
|
||||
return (fxp - fmp)/(fxp + fmp);
|
||||
}
|
||||
|
||||
//Compute the Zmobius Omega parameters suitable for eigenvalue range -lambda_bound <= lambda <= lambda_bound
|
||||
//Note omega_i = 1/(b_i + c_i) where b_i and c_i are the Mobius parameters
|
||||
void computeZmobiusOmega(std::vector<ComplexD> &omega_out, const int Ls_out,
|
||||
const std::vector<RealD> &omega_in, const int Ls_in,
|
||||
const RealD lambda_bound){
|
||||
assert(omega_in.size() == Ls_in);
|
||||
omega_out.resize(Ls_out);
|
||||
|
||||
//Use the Remez algorithm to generate the appropriate rational polynomial
|
||||
//For odd polynomial, to satisfy Haar condition must take either positive or negative half of range (cf https://arxiv.org/pdf/0803.0439.pdf page 6)
|
||||
AlgRemezGeneral remez(0, lambda_bound, 64, &epsilonMobius, (void*)&omega_in);
|
||||
remez.generateApprox(Ls_out-1, Ls_out,AlgRemezGeneral::Odd, AlgRemezGeneral::Even, 1e-15, 100);
|
||||
remez.csv(std::cout);
|
||||
|
||||
//The rational approximation has the form [ f(x) - f(-x) ] / [ f(x) + f(-x) ] where f(x) = \Prod_{i=0}^{L_s-1} ( \omega_i + x )
|
||||
//cf https://academiccommons.columbia.edu/doi/10.7916/D8T72HD7 pg 102
|
||||
//omega_i are therefore the negative of the complex roots of f(x)
|
||||
|
||||
//We can find the roots by recognizing that the eigenvalues of a matrix A are the roots of the characteristic polynomial
|
||||
// \rho(\lambda) = det( A - \lambda I ) where I is the unit matrix
|
||||
//The matrix whose characteristic polynomial is an arbitrary monic polynomial a0 + a1 x + a2 x^2 + ... x^n is the companion matrix
|
||||
// A = | 0 1 0 0 0 .... 0 |
|
||||
// | 0 0 1 0 0 .... 0 |
|
||||
// | : : : : : : |
|
||||
// | 0 0 0 0 0 1
|
||||
// | -a0 -a1 -a2 ... ... -an|
|
||||
|
||||
|
||||
//Note the Remez defines the largest power to have unit coefficient
|
||||
std::vector<RealD> coeffs(Ls_out+1);
|
||||
for(int i=0;i<Ls_out+1;i+=2) coeffs[i] = coeffs[i] = remez.getCoeffDen(i); //even powers
|
||||
for(int i=1;i<Ls_out+1;i+=2) coeffs[i] = coeffs[i] = remez.getCoeffNum(i); //odd powers
|
||||
|
||||
std::vector<std::complex<RealD> > roots(Ls_out);
|
||||
|
||||
//Form the companion matrix
|
||||
Eigen::MatrixXd compn(Ls_out,Ls_out);
|
||||
for(int i=0;i<Ls_out-1;i++) compn(i,0) = 0.;
|
||||
compn(Ls_out - 1, 0) = -coeffs[0];
|
||||
|
||||
for(int j=1;j<Ls_out;j++){
|
||||
for(int i=0;i<Ls_out-1;i++) compn(i,j) = i == j-1 ? 1. : 0.;
|
||||
compn(Ls_out - 1, j) = -coeffs[j];
|
||||
}
|
||||
|
||||
//Eigensolve
|
||||
Eigen::EigenSolver<Eigen::MatrixXd> slv(compn, false);
|
||||
|
||||
const auto & ev = slv.eigenvalues();
|
||||
for(int i=0;i<Ls_out;i++)
|
||||
omega_out[i] = -ev(i);
|
||||
|
||||
//Sort ascending (smallest at start of vector!)
|
||||
std::sort(omega_out.begin(), omega_out.end(),
|
||||
[&](const ComplexD &a, const ComplexD &b){ return a.real() < b.real() || (a.real() == b.real() && a.imag() < b.imag()); });
|
||||
|
||||
//McGlynn thesis pg 122 suggest improved iteration counts if magnitude of omega diminishes towards the center of the 5th dimension
|
||||
std::vector<ComplexD> omega_tmp = omega_out;
|
||||
int s_low=0, s_high=Ls_out-1, ss=0;
|
||||
for(int s_from = Ls_out-1; s_from >= 0; s_from--){ //loop from largest omega
|
||||
int s_to;
|
||||
if(ss % 2 == 0){
|
||||
s_to = s_low++;
|
||||
}else{
|
||||
s_to = s_high--;
|
||||
}
|
||||
omega_out[s_to] = omega_tmp[s_from];
|
||||
++ss;
|
||||
}
|
||||
|
||||
std::cout << "Resulting omega_i:" << std::endl;
|
||||
for(int i=0;i<Ls_out;i++)
|
||||
std::cout << omega_out[i] << std::endl;
|
||||
|
||||
std::cout << "Test result matches the approximate polynomial found by the Remez" << std::endl;
|
||||
std::cout << "<x> <remez approx> <poly approx> <diff poly approx remez approx> <exact> <diff poly approx exact>\n";
|
||||
|
||||
int npt = 60;
|
||||
double dlt = lambda_bound/double(npt-1);
|
||||
|
||||
for (int i =0; i<npt; i++){
|
||||
double x = i*dlt;
|
||||
double r = remez.evaluateApprox(x);
|
||||
double p = epsilonMobius(x, omega_out);
|
||||
double e = epsilonMobius(x, omega_in);
|
||||
|
||||
std::cout << x<< " " << r << " " << p <<" " <<r-p << " " << e << " " << e-p << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//mobius_param = b+c with b-c=1
|
||||
void computeZmobiusOmega(std::vector<ComplexD> &omega_out, const int Ls_out, const RealD mobius_param, const int Ls_in, const RealD lambda_bound){
|
||||
std::vector<RealD> omega_in(Ls_in, 1./mobius_param);
|
||||
computeZmobiusOmega(omega_out, Ls_out, omega_in, Ls_in, lambda_bound);
|
||||
}
|
||||
|
||||
//ZMobius class takes gamma_i = (b+c) omega_i as its input, where b, c are factored out
|
||||
void computeZmobiusGamma(std::vector<ComplexD> &gamma_out,
|
||||
const RealD mobius_param_out, const int Ls_out,
|
||||
const RealD mobius_param_in, const int Ls_in,
|
||||
const RealD lambda_bound){
|
||||
computeZmobiusOmega(gamma_out, Ls_out, mobius_param_in, Ls_in, lambda_bound);
|
||||
for(int i=0;i<Ls_out;i++) gamma_out[i] = gamma_out[i] * mobius_param_out;
|
||||
}
|
||||
//Assumes mobius_param_out == mobius_param_in
|
||||
void computeZmobiusGamma(std::vector<ComplexD> &gamma_out, const int Ls_out, const RealD mobius_param, const int Ls_in, const RealD lambda_bound){
|
||||
computeZmobiusGamma(gamma_out, mobius_param, Ls_out, mobius_param, Ls_in, lambda_bound);
|
||||
}
|
||||
|
||||
NAMESPACE_END(Approx);
|
||||
NAMESPACE_END(Grid);
|
57
Grid/algorithms/approx/ZMobius.h
Normal file
57
Grid/algorithms/approx/ZMobius.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/ZMobius.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ZMOBIUS_APPROX_H
|
||||
#define GRID_ZMOBIUS_APPROX_H
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
NAMESPACE_BEGIN(Approx);
|
||||
|
||||
//Compute the Zmobius Omega parameters suitable for eigenvalue range -lambda_bound <= lambda <= lambda_bound
|
||||
//Note omega_i = 1/(b_i + c_i) where b_i and c_i are the Mobius parameters
|
||||
void computeZmobiusOmega(std::vector<ComplexD> &omega_out, const int Ls_out,
|
||||
const std::vector<RealD> &omega_in, const int Ls_in,
|
||||
const RealD lambda_bound);
|
||||
|
||||
//mobius_param = b+c with b-c=1
|
||||
void computeZmobiusOmega(std::vector<ComplexD> &omega_out, const int Ls_out, const RealD mobius_param, const int Ls_in, const RealD lambda_bound);
|
||||
|
||||
//ZMobius class takes gamma_i = (b+c) omega_i as its input, where b, c are factored out
|
||||
void computeZmobiusGamma(std::vector<ComplexD> &gamma_out,
|
||||
const RealD mobius_param_out, const int Ls_out,
|
||||
const RealD mobius_param_in, const int Ls_in,
|
||||
const RealD lambda_bound);
|
||||
|
||||
//Assumes mobius_param_out == mobius_param_in
|
||||
void computeZmobiusGamma(std::vector<ComplexD> &gamma_out, const int Ls_out, const RealD mobius_param, const int Ls_in, const RealD lambda_bound);
|
||||
|
||||
NAMESPACE_END(Approx);
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -25,6 +25,10 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef INCLUDED_BIGFLOAT_DOUBLE_H
|
||||
#define INCLUDED_BIGFLOAT_DOUBLE_H
|
||||
|
||||
#include <math.h>
|
||||
|
||||
typedef double mfloat;
|
||||
@ -186,4 +190,6 @@ public:
|
||||
// friend bigfloat& random(void);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
222
Grid/algorithms/iterative/BiCGSTAB.h
Normal file
222
Grid/algorithms/iterative/BiCGSTAB.h
Normal file
@ -0,0 +1,222 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/BiCGSTAB.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: juettner <juettner@soton.ac.uk>
|
||||
Author: David Murphy <djmurphy@mit.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef GRID_BICGSTAB_H
|
||||
#define GRID_BICGSTAB_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template <class Field>
|
||||
class BiCGSTAB : public OperatorFunction<Field>
|
||||
{
|
||||
public:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
|
||||
BiCGSTAB(RealD tol, Integer maxit, bool err_on_no_conv = true) :
|
||||
Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){};
|
||||
|
||||
void operator()(LinearOperatorBase<Field>& Linop, const Field& src, Field& psi)
|
||||
{
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
conformable(psi, src);
|
||||
|
||||
RealD cp(0), rho(1), rho_prev(0), alpha(1), beta(0), omega(1);
|
||||
RealD a(0), bo(0), b(0), ssq(0);
|
||||
|
||||
Field p(src);
|
||||
Field r(src);
|
||||
Field rhat(src);
|
||||
Field v(src);
|
||||
Field s(src);
|
||||
Field t(src);
|
||||
Field h(src);
|
||||
|
||||
v = Zero();
|
||||
p = Zero();
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
Linop.Op(psi, v);
|
||||
b = norm2(v);
|
||||
|
||||
r = src - v;
|
||||
rhat = r;
|
||||
a = norm2(r);
|
||||
ssq = norm2(src);
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: mp " << b << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: r " << a << std::endl;
|
||||
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
// Check if guess is really REALLY good :)
|
||||
if(a <= rsq){ return; }
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: k=0 residual " << a << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch InnerTimer;
|
||||
GridStopWatch AxpyNormTimer;
|
||||
GridStopWatch LinearCombTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++)
|
||||
{
|
||||
rho_prev = rho;
|
||||
|
||||
LinalgTimer.Start();
|
||||
InnerTimer.Start();
|
||||
ComplexD Crho = innerProduct(rhat,r);
|
||||
InnerTimer.Stop();
|
||||
rho = Crho.real();
|
||||
|
||||
beta = (rho / rho_prev) * (alpha / omega);
|
||||
|
||||
LinearCombTimer.Start();
|
||||
bo = beta * omega;
|
||||
auto p_v = p.View();
|
||||
auto r_v = r.View();
|
||||
auto v_v = v.View();
|
||||
accelerator_for(ss, p_v.size(), Field::vector_object::Nsimd(),{
|
||||
coalescedWrite(p_v[ss], beta*p_v(ss) - bo*v_v(ss) + r_v(ss));
|
||||
});
|
||||
LinearCombTimer.Stop();
|
||||
LinalgTimer.Stop();
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.Op(p,v);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
InnerTimer.Start();
|
||||
ComplexD Calpha = innerProduct(rhat,v);
|
||||
InnerTimer.Stop();
|
||||
alpha = rho / Calpha.real();
|
||||
|
||||
LinearCombTimer.Start();
|
||||
auto h_v = h.View();
|
||||
auto psi_v = psi.View();
|
||||
accelerator_for(ss, h_v.size(), Field::vector_object::Nsimd(),{
|
||||
coalescedWrite(h_v[ss], alpha*p_v(ss) + psi_v(ss));
|
||||
});
|
||||
|
||||
auto s_v = s.View();
|
||||
accelerator_for(ss, s_v.size(), Field::vector_object::Nsimd(),{
|
||||
coalescedWrite(s_v[ss], -alpha*v_v(ss) + r_v(ss));
|
||||
});
|
||||
LinearCombTimer.Stop();
|
||||
LinalgTimer.Stop();
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.Op(s,t);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
InnerTimer.Start();
|
||||
ComplexD Comega = innerProduct(t,s);
|
||||
InnerTimer.Stop();
|
||||
omega = Comega.real() / norm2(t);
|
||||
|
||||
LinearCombTimer.Start();
|
||||
auto t_v = t.View();
|
||||
accelerator_for(ss, psi_v.size(), Field::vector_object::Nsimd(),{
|
||||
coalescedWrite(psi_v[ss], h_v(ss) + omega * s_v(ss));
|
||||
coalescedWrite(r_v[ss], -omega * t_v(ss) + s_v(ss));
|
||||
});
|
||||
LinearCombTimer.Stop();
|
||||
|
||||
cp = norm2(r);
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "BiCGSTAB: Iteration " << k << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if(cp <= rsq)
|
||||
{
|
||||
SolverTimer.Stop();
|
||||
Linop.Op(psi, v);
|
||||
p = v - src;
|
||||
|
||||
RealD srcnorm = sqrt(norm2(src));
|
||||
RealD resnorm = sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "BiCGSTAB Converged on iteration " << k << std::endl;
|
||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp/ssq) << std::endl;
|
||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual << std::endl;
|
||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time breakdown " << std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() << std::endl;
|
||||
|
||||
if(ErrorOnNoConverge){ assert(true_residual / Tolerance < 10000.0); }
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "BiCGSTAB did NOT converge" << std::endl;
|
||||
|
||||
if(ErrorOnNoConverge){ assert(0); }
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
158
Grid/algorithms/iterative/BiCGSTABMixedPrec.h
Normal file
158
Grid/algorithms/iterative/BiCGSTABMixedPrec.h
Normal file
@ -0,0 +1,158 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/BiCGSTABMixedPrec.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
Author: David Murphy <djmurphy@mit.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef GRID_BICGSTAB_MIXED_PREC_H
|
||||
#define GRID_BICGSTAB_MIXED_PREC_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Mixed precision restarted defect correction BiCGSTAB
|
||||
template<class FieldD, class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class MixedPrecisionBiCGSTAB : public LinearFunction<FieldD>
|
||||
{
|
||||
public:
|
||||
RealD Tolerance;
|
||||
RealD InnerTolerance; // Initial tolerance for inner CG. Defaults to Tolerance but can be changed
|
||||
Integer MaxInnerIterations;
|
||||
Integer MaxOuterIterations;
|
||||
GridBase* SinglePrecGrid; // Grid for single-precision fields
|
||||
RealD OuterLoopNormMult; // Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
|
||||
Integer TotalInnerIterations; //Number of inner CG iterations
|
||||
Integer TotalOuterIterations; //Number of restarts
|
||||
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
|
||||
|
||||
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||
LinearFunction<FieldF> *guesser;
|
||||
|
||||
MixedPrecisionBiCGSTAB(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid,
|
||||
LinearOperatorBase<FieldF>& _Linop_f, LinearOperatorBase<FieldD>& _Linop_d) :
|
||||
Linop_f(_Linop_f), Linop_d(_Linop_d), Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit),
|
||||
MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), OuterLoopNormMult(100.), guesser(NULL) {};
|
||||
|
||||
void useGuesser(LinearFunction<FieldF>& g){
|
||||
guesser = &g;
|
||||
}
|
||||
|
||||
void operator() (const FieldD& src_d_in, FieldD& sol_d)
|
||||
{
|
||||
TotalInnerIterations = 0;
|
||||
|
||||
GridStopWatch TotalTimer;
|
||||
TotalTimer.Start();
|
||||
|
||||
int cb = src_d_in.Checkerboard();
|
||||
sol_d.Checkerboard() = cb;
|
||||
|
||||
RealD src_norm = norm2(src_d_in);
|
||||
RealD stop = src_norm * Tolerance*Tolerance;
|
||||
|
||||
GridBase* DoublePrecGrid = src_d_in.Grid();
|
||||
FieldD tmp_d(DoublePrecGrid);
|
||||
tmp_d.Checkerboard() = cb;
|
||||
|
||||
FieldD tmp2_d(DoublePrecGrid);
|
||||
tmp2_d.Checkerboard() = cb;
|
||||
|
||||
FieldD src_d(DoublePrecGrid);
|
||||
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
|
||||
|
||||
RealD inner_tol = InnerTolerance;
|
||||
|
||||
FieldF src_f(SinglePrecGrid);
|
||||
src_f.Checkerboard() = cb;
|
||||
|
||||
FieldF sol_f(SinglePrecGrid);
|
||||
sol_f.Checkerboard() = cb;
|
||||
|
||||
BiCGSTAB<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||
CG_f.ErrorOnNoConverge = false;
|
||||
|
||||
GridStopWatch InnerCGtimer;
|
||||
|
||||
GridStopWatch PrecChangeTimer;
|
||||
|
||||
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
|
||||
|
||||
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++)
|
||||
{
|
||||
// Compute double precision rsd and also new RHS vector.
|
||||
Linop_d.Op(sol_d, tmp_d);
|
||||
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
|
||||
|
||||
std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Outer iteration " << outer_iter << " residual " << norm << " target " << stop << std::endl;
|
||||
|
||||
if(norm < OuterLoopNormMult * stop){
|
||||
std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Outer iteration converged on iteration " << outer_iter << std::endl;
|
||||
break;
|
||||
}
|
||||
while(norm * inner_tol * inner_tol < stop){ inner_tol *= 2; } // inner_tol = sqrt(stop/norm) ??
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(src_f, src_d);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
sol_f = Zero();
|
||||
|
||||
//Optionally improve inner solver guess (eg using known eigenvectors)
|
||||
if(guesser != NULL){ (*guesser)(src_f, sol_f); }
|
||||
|
||||
//Inner CG
|
||||
CG_f.Tolerance = inner_tol;
|
||||
InnerCGtimer.Start();
|
||||
CG_f(Linop_f, src_f, sol_f);
|
||||
InnerCGtimer.Stop();
|
||||
TotalInnerIterations += CG_f.IterationsToComplete;
|
||||
|
||||
//Convert sol back to double and add to double prec solution
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(tmp_d, sol_f);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
axpy(sol_d, 1.0, tmp_d, sol_d);
|
||||
}
|
||||
|
||||
//Final trial CG
|
||||
std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Starting final patch-up double-precision solve" << std::endl;
|
||||
|
||||
BiCGSTAB<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||
CG_d(Linop_d, src_d_in, sol_d);
|
||||
TotalFinalStepIterations = CG_d.IterationsToComplete;
|
||||
|
||||
TotalTimer.Stop();
|
||||
std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
|
||||
std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -52,6 +52,7 @@ class BlockConjugateGradient : public OperatorFunction<Field> {
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer PrintInterval; //GridLogMessages or Iterative
|
||||
RealD TrueResidual;
|
||||
|
||||
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
|
||||
@ -306,7 +307,8 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
|
||||
Linop.HermOp(X, AD);
|
||||
AD = AD-B;
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
|
||||
TrueResidual = std::sqrt(norm2(AD)/norm2(B));
|
||||
std::cout << GridLogMessage <<"\tTrue residual is " << TrueResidual <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
@ -442,7 +444,8 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
|
||||
|
||||
Linop.HermOp(Psi, AP);
|
||||
AP = AP-Src;
|
||||
std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
||||
TrueResidual = std::sqrt(norm2(AP)/norm2(Src));
|
||||
std::cout <<GridLogMessage << "\tTrue residual is " << TrueResidual <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
@ -653,7 +656,7 @@ void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
|
||||
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< std::sqrt(rrsum/sssum) << " max "<< std::sqrt(max_resid) <<std::endl;
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
@ -668,7 +671,8 @@ void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field
|
||||
|
||||
for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
|
||||
for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
|
||||
TrueResidual = std::sqrt(normv(AD)/normv(B));
|
||||
std::cout << GridLogMessage << "\tTrue residual is " << TrueResidual <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
|
@ -49,6 +49,7 @@ public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
RealD TrueResidual;
|
||||
|
||||
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
@ -81,6 +82,14 @@ public:
|
||||
cp = a;
|
||||
ssq = norm2(src);
|
||||
|
||||
// Handle trivial case of zero src
|
||||
if (ssq == 0.){
|
||||
psi = Zero();
|
||||
IterationsToComplete = 1;
|
||||
TrueResidual = 0.;
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mp " << d << std::endl;
|
||||
@ -92,6 +101,7 @@ public:
|
||||
|
||||
// Check if guess is really REALLY good :)
|
||||
if (cp <= rsq) {
|
||||
TrueResidual = std::sqrt(a/ssq);
|
||||
std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl;
|
||||
IterationsToComplete = 0;
|
||||
return;
|
||||
@ -141,7 +151,7 @@ public:
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||
<< " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
|
||||
<< " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
@ -169,10 +179,17 @@ public:
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
IterationsToComplete = k;
|
||||
TrueResidual = true_residual;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Failed. Calculate true residual before giving up
|
||||
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||
p = mmp - src;
|
||||
|
||||
TrueResidual = sqrt(norm2(p)/ssq);
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradient did NOT converge "<<k<<" / "<< MaxIterations<< std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
|
@ -46,15 +46,19 @@ public:
|
||||
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
std::vector<RealD> TrueResidualShift;
|
||||
|
||||
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
|
||||
MaxIterations(maxit),
|
||||
shifts(_shifts)
|
||||
{
|
||||
verbose=1;
|
||||
IterationsToCompleteShift.resize(_shifts.order);
|
||||
TrueResidualShift.resize(_shifts.order);
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
|
||||
@ -125,6 +129,17 @@ public:
|
||||
// Residuals "r" are src
|
||||
// First search direction "p" is also src
|
||||
cp = norm2(src);
|
||||
|
||||
// Handle trivial case of zero src.
|
||||
if( cp == 0. ){
|
||||
for(int s=0;s<nshift;s++){
|
||||
psi[s] = Zero();
|
||||
IterationsToCompleteShift[s] = 1;
|
||||
TrueResidualShift[s] = 0.;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
rsq[s] = cp * mresidual[s] * mresidual[s];
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift: shift "<<s
|
||||
@ -270,6 +285,7 @@ public:
|
||||
for(int s=0;s<nshift;s++){
|
||||
|
||||
if ( (!converged[s]) ){
|
||||
IterationsToCompleteShift[s] = k;
|
||||
|
||||
RealD css = c * z[s][iz]* z[s][iz];
|
||||
|
||||
@ -299,7 +315,8 @@ public:
|
||||
axpy(r,-alpha[s],src,tmp);
|
||||
RealD rn = norm2(r);
|
||||
RealD cn = norm2(src);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
||||
TrueResidualShift[s] = std::sqrt(rn/cn);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<< TrueResidualShift[s] <<std::endl;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
|
@ -37,211 +37,6 @@ Author: Christoph Lehner <clehner@bnl.gov>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Move following 100 LOC to lattice/Lattice_basis.h
|
||||
////////////////////////////////////////////////////////
|
||||
template<class Field>
|
||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
|
||||
{
|
||||
// If assume basis[j] are already orthonormal,
|
||||
// can take all inner products in parallel saving 2x bandwidth
|
||||
// Save 3x bandwidth on the second line of loop.
|
||||
// perhaps 2.5x speed up.
|
||||
// 2x overall in Multigrid Lanczos
|
||||
for(int j=0; j<k; ++j){
|
||||
auto ip = innerProduct(basis[j],w);
|
||||
w = w - ip*basis[j];
|
||||
}
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef decltype(basis[0].View()) View;
|
||||
auto tmp_v = basis[0].View();
|
||||
Vector<View> basis_v(basis.size(),tmp_v);
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0].Grid();
|
||||
|
||||
for(int k=0;k<basis.size();k++){
|
||||
basis_v[k] = basis[k].View();
|
||||
}
|
||||
#if 0
|
||||
std::vector < vobj , commAllocator<vobj> > Bt(thread_max() * Nm); // Thread private
|
||||
thread_region
|
||||
{
|
||||
vobj* B = Bt.data() + Nm * thread_num();
|
||||
|
||||
thread_for_in_region(ss, grid->oSites(),{
|
||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||
|
||||
for(int j=j0; j<j1; ++j){
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B[j] +=Qt(j,k) * basis_v[k][ss];
|
||||
}
|
||||
}
|
||||
for(int j=j0; j<j1; ++j){
|
||||
basis_v[j][ss] = B[j];
|
||||
}
|
||||
});
|
||||
}
|
||||
#else
|
||||
|
||||
int nrot = j1-j0;
|
||||
|
||||
|
||||
uint64_t oSites =grid->oSites();
|
||||
uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead
|
||||
|
||||
// printf("BasisRotate %d %d nrot %d siteBlock %d\n",j0,j1,nrot,siteBlock);
|
||||
|
||||
Vector <vobj> Bt(siteBlock * nrot);
|
||||
auto Bp=&Bt[0];
|
||||
|
||||
// GPU readable copy of Eigen matrix
|
||||
Vector<double> Qt_jv(Nm*Nm);
|
||||
double *Qt_p = & Qt_jv[0];
|
||||
for(int k=0;k<Nm;++k){
|
||||
for(int j=0;j<Nm;++j){
|
||||
Qt_p[j*Nm+k]=Qt(j,k);
|
||||
}
|
||||
}
|
||||
|
||||
// Block the loop to keep storage footprint down
|
||||
vobj zz=Zero();
|
||||
for(uint64_t s=0;s<oSites;s+=siteBlock){
|
||||
|
||||
// remaining work in this block
|
||||
int ssites=MIN(siteBlock,oSites-s);
|
||||
|
||||
// zero out the accumulators
|
||||
accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{
|
||||
auto z=coalescedRead(zz);
|
||||
coalescedWrite(Bp[ss],z);
|
||||
});
|
||||
|
||||
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
||||
|
||||
int j =sj%nrot;
|
||||
int jj =j0+j;
|
||||
int ss =sj/nrot;
|
||||
int sss=ss+s;
|
||||
|
||||
for(int k=k0; k<k1; ++k){
|
||||
auto tmp = coalescedRead(Bp[ss*nrot+j]);
|
||||
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss]));
|
||||
}
|
||||
});
|
||||
|
||||
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
||||
int j =sj%nrot;
|
||||
int jj =j0+j;
|
||||
int ss =sj/nrot;
|
||||
int sss=ss+s;
|
||||
coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j]));
|
||||
});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Extract a single rotated vector
|
||||
template<class Field>
|
||||
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef decltype(basis[0].View()) View;
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0].Grid();
|
||||
|
||||
result.Checkerboard() = basis[0].Checkerboard();
|
||||
auto result_v=result.View();
|
||||
Vector<View> basis_v(basis.size(),result_v);
|
||||
for(int k=0;k<basis.size();k++){
|
||||
basis_v[k] = basis[k].View();
|
||||
}
|
||||
vobj zz=Zero();
|
||||
Vector<double> Qt_jv(Nm);
|
||||
double * Qt_j = & Qt_jv[0];
|
||||
for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k);
|
||||
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
|
||||
auto B=coalescedRead(zz);
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B +=Qt_j[k] * coalescedRead(basis_v[k][ss]);
|
||||
}
|
||||
coalescedWrite(result_v[ss], B);
|
||||
});
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
|
||||
{
|
||||
int vlen = idx.size();
|
||||
|
||||
assert(vlen>=1);
|
||||
assert(vlen<=sort_vals.size());
|
||||
assert(vlen<=_v.size());
|
||||
|
||||
for (size_t i=0;i<vlen;i++) {
|
||||
|
||||
if (idx[i] != i) {
|
||||
|
||||
//////////////////////////////////////
|
||||
// idx[i] is a table of desired sources giving a permutation.
|
||||
// Swap v[i] with v[idx[i]].
|
||||
// Find j>i for which _vnew[j] = _vold[i],
|
||||
// track the move idx[j] => idx[i]
|
||||
// track the move idx[i] => i
|
||||
//////////////////////////////////////
|
||||
size_t j;
|
||||
for (j=i;j<idx.size();j++)
|
||||
if (idx[j]==i)
|
||||
break;
|
||||
|
||||
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
|
||||
|
||||
swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy
|
||||
std::swap(sort_vals[i],sort_vals[idx[i]]);
|
||||
|
||||
idx[j] = idx[i];
|
||||
idx[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
|
||||
{
|
||||
std::vector<int> idx(sort_vals.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
|
||||
// sort indexes based on comparing values in v
|
||||
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
|
||||
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
|
||||
{
|
||||
std::vector<int> idx = basisSortGetIndex(sort_vals);
|
||||
if (reverse)
|
||||
std::reverse(idx.begin(), idx.end());
|
||||
|
||||
basisReorderInPlace(_v,sort_vals,idx);
|
||||
}
|
||||
|
||||
// PAB: faster to compute the inner products first then fuse loops.
|
||||
// If performance critical can improve.
|
||||
template<class Field>
|
||||
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
|
||||
result = Zero();
|
||||
assert(_v.size()==eval.size());
|
||||
int N = (int)_v.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
Field& tmp = _v[i];
|
||||
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Implicitly restarted lanczos
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
@ -405,6 +405,70 @@ namespace Grid {
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field> class NonHermitianSchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field>
|
||||
{
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
NonHermitianSchurRedBlackDiagMooeeSolve(OperatorFunction<Field>& RBSolver, const bool initSubGuess = false,
|
||||
const bool _solnAsInitGuess = false)
|
||||
: SchurRedBlackBase<Field>(RBSolver, initSubGuess, _solnAsInitGuess) {};
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix& _Matrix, const Field& src, Field& src_e, Field& src_o)
|
||||
{
|
||||
GridBase* grid = _Matrix.RedBlackGrid();
|
||||
GridBase* fgrid = _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even, src_e, src);
|
||||
pickCheckerboard(Odd , src_o, src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e, tmp); assert( tmp.Checkerboard() == Even );
|
||||
_Matrix.Meooe (tmp, Mtmp); assert( Mtmp.Checkerboard() == Odd );
|
||||
src_o -= Mtmp; assert( src_o.Checkerboard() == Odd );
|
||||
}
|
||||
|
||||
virtual void RedBlackSolution(Matrix& _Matrix, const Field& sol_o, const Field& src_e, Field& sol)
|
||||
{
|
||||
GridBase* grid = _Matrix.RedBlackGrid();
|
||||
GridBase* fgrid = _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e_i(grid);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o, tmp); assert( tmp.Checkerboard() == Even );
|
||||
src_e_i = src_e - tmp; assert( src_e_i.Checkerboard() == Even );
|
||||
_Matrix.MooeeInv(src_e_i, sol_e); assert( sol_e.Checkerboard() == Even );
|
||||
|
||||
setCheckerboard(sol, sol_e); assert( sol_e.Checkerboard() == Even );
|
||||
setCheckerboard(sol, sol_o); assert( sol_o.Checkerboard() == Odd );
|
||||
}
|
||||
|
||||
virtual void RedBlackSolve(Matrix& _Matrix, const Field& src_o, Field& sol_o)
|
||||
{
|
||||
NonHermitianSchurDiagMooeeOperator<Matrix,Field> _OpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_OpEO, src_o, sol_o); assert(sol_o.Checkerboard() == Odd);
|
||||
}
|
||||
|
||||
virtual void RedBlackSolve(Matrix& _Matrix, const std::vector<Field>& src_o, std::vector<Field>& sol_o)
|
||||
{
|
||||
NonHermitianSchurDiagMooeeOperator<Matrix,Field> _OpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_OpEO, src_o, sol_o);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal is identity, right preconditioned by Mee^inv
|
||||
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
|
||||
@ -482,5 +546,76 @@ namespace Grid {
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field> class NonHermitianSchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field>
|
||||
{
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
NonHermitianSchurRedBlackDiagTwoSolve(OperatorFunction<Field>& RBSolver, const bool initSubGuess = false,
|
||||
const bool _solnAsInitGuess = false)
|
||||
: SchurRedBlackBase<Field>(RBSolver, initSubGuess, _solnAsInitGuess) {};
|
||||
|
||||
virtual void RedBlackSource(Matrix& _Matrix, const Field& src, Field& src_e, Field& src_o)
|
||||
{
|
||||
GridBase* grid = _Matrix.RedBlackGrid();
|
||||
GridBase* fgrid = _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even, src_e, src);
|
||||
pickCheckerboard(Odd , src_o, src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e, tmp); assert( tmp.Checkerboard() == Even );
|
||||
_Matrix.Meooe (tmp, Mtmp); assert( Mtmp.Checkerboard() == Odd );
|
||||
src_o -= Mtmp; assert( src_o.Checkerboard() == Odd );
|
||||
}
|
||||
|
||||
virtual void RedBlackSolution(Matrix& _Matrix, const Field& sol_o, const Field& src_e, Field& sol)
|
||||
{
|
||||
GridBase* grid = _Matrix.RedBlackGrid();
|
||||
GridBase* fgrid = _Matrix.Grid();
|
||||
|
||||
Field sol_o_i(grid);
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// MooeeInv due to pecond
|
||||
////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(sol_o, tmp);
|
||||
sol_o_i = tmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o_i, tmp); assert( tmp.Checkerboard() == Even );
|
||||
tmp = src_e - tmp; assert( src_e.Checkerboard() == Even );
|
||||
_Matrix.MooeeInv(tmp, sol_e); assert( sol_e.Checkerboard() == Even );
|
||||
|
||||
setCheckerboard(sol, sol_e); assert( sol_e.Checkerboard() == Even );
|
||||
setCheckerboard(sol, sol_o_i); assert( sol_o_i.Checkerboard() == Odd );
|
||||
};
|
||||
|
||||
virtual void RedBlackSolve(Matrix& _Matrix, const Field& src_o, Field& sol_o)
|
||||
{
|
||||
NonHermitianSchurDiagTwoOperator<Matrix,Field> _OpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_OpEO, src_o, sol_o);
|
||||
};
|
||||
|
||||
virtual void RedBlackSolve(Matrix& _Matrix, const std::vector<Field>& src_o, std::vector<Field>& sol_o)
|
||||
{
|
||||
NonHermitianSchurDiagTwoOperator<Matrix,Field> _OpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_OpEO, src_o, sol_o);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -6,21 +6,39 @@ NAMESPACE_BEGIN(Grid);
|
||||
MemoryStats *MemoryProfiler::stats = nullptr;
|
||||
bool MemoryProfiler::debug = false;
|
||||
|
||||
#ifdef GRID_NVCC
|
||||
#define SMALL_LIMIT (0)
|
||||
#else
|
||||
#define SMALL_LIMIT (4096)
|
||||
int PointerCache::NcacheSmall = PointerCache::NcacheSmallMax;
|
||||
#ifdef GRID_CUDA
|
||||
int PointerCache::Ncache = 32;
|
||||
#else
|
||||
int PointerCache::Ncache = 8;
|
||||
#endif
|
||||
int PointerCache::Victim;
|
||||
int PointerCache::VictimSmall;
|
||||
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::NcacheMax];
|
||||
PointerCache::PointerCacheEntry PointerCache::EntriesSmall[PointerCache::NcacheSmallMax];
|
||||
|
||||
#ifdef POINTER_CACHE
|
||||
int PointerCache::victim;
|
||||
void PointerCache::Init(void)
|
||||
{
|
||||
char * str;
|
||||
|
||||
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
|
||||
str= getenv("GRID_ALLOC_NCACHE_LARGE");
|
||||
if ( str ) Ncache = atoi(str);
|
||||
if ( (Ncache<0) || (Ncache > NcacheMax)) Ncache = NcacheMax;
|
||||
|
||||
void *PointerCache::Insert(void *ptr,size_t bytes) {
|
||||
|
||||
if (bytes < SMALL_LIMIT ) return ptr;
|
||||
str= getenv("GRID_ALLOC_NCACHE_SMALL");
|
||||
if ( str ) NcacheSmall = atoi(str);
|
||||
if ( (NcacheSmall<0) || (NcacheSmall > NcacheSmallMax)) NcacheSmall = NcacheSmallMax;
|
||||
|
||||
// printf("Aligned alloocator cache: large %d/%d small %d/%d\n",Ncache,NcacheMax,NcacheSmall,NcacheSmallMax);
|
||||
}
|
||||
void *PointerCache::Insert(void *ptr,size_t bytes)
|
||||
{
|
||||
if (bytes < GRID_ALLOC_SMALL_LIMIT )
|
||||
return Insert(ptr,bytes,EntriesSmall,NcacheSmall,VictimSmall);
|
||||
return Insert(ptr,bytes,Entries,Ncache,Victim);
|
||||
}
|
||||
void *PointerCache::Insert(void *ptr,size_t bytes,PointerCacheEntry *entries,int ncache,int &victim)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
@ -28,8 +46,8 @@ void *PointerCache::Insert(void *ptr,size_t bytes) {
|
||||
void * ret = NULL;
|
||||
int v = -1;
|
||||
|
||||
for(int e=0;e<Ncache;e++) {
|
||||
if ( Entries[e].valid==0 ) {
|
||||
for(int e=0;e<ncache;e++) {
|
||||
if ( entries[e].valid==0 ) {
|
||||
v=e;
|
||||
break;
|
||||
}
|
||||
@ -37,40 +55,43 @@ void *PointerCache::Insert(void *ptr,size_t bytes) {
|
||||
|
||||
if ( v==-1 ) {
|
||||
v=victim;
|
||||
victim = (victim+1)%Ncache;
|
||||
victim = (victim+1)%ncache;
|
||||
}
|
||||
|
||||
if ( Entries[v].valid ) {
|
||||
ret = Entries[v].address;
|
||||
Entries[v].valid = 0;
|
||||
Entries[v].address = NULL;
|
||||
Entries[v].bytes = 0;
|
||||
if ( entries[v].valid ) {
|
||||
ret = entries[v].address;
|
||||
entries[v].valid = 0;
|
||||
entries[v].address = NULL;
|
||||
entries[v].bytes = 0;
|
||||
}
|
||||
|
||||
Entries[v].address=ptr;
|
||||
Entries[v].bytes =bytes;
|
||||
Entries[v].valid =1;
|
||||
entries[v].address=ptr;
|
||||
entries[v].bytes =bytes;
|
||||
entries[v].valid =1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *PointerCache::Lookup(size_t bytes) {
|
||||
|
||||
if (bytes < SMALL_LIMIT ) return NULL;
|
||||
|
||||
void *PointerCache::Lookup(size_t bytes)
|
||||
{
|
||||
if (bytes < GRID_ALLOC_SMALL_LIMIT )
|
||||
return Lookup(bytes,EntriesSmall,NcacheSmall);
|
||||
return Lookup(bytes,Entries,Ncache);
|
||||
}
|
||||
void *PointerCache::Lookup(size_t bytes,PointerCacheEntry *entries,int ncache)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
|
||||
for(int e=0;e<Ncache;e++){
|
||||
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
|
||||
Entries[e].valid = 0;
|
||||
return Entries[e].address;
|
||||
for(int e=0;e<ncache;e++){
|
||||
if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
|
||||
entries[e].valid = 0;
|
||||
return entries[e].address;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void check_huge_pages(void *Buf,uint64_t BYTES)
|
||||
{
|
||||
|
@ -42,21 +42,21 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
#define POINTER_CACHE
|
||||
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
||||
#define GRID_ALLOC_SMALL_LIMIT (4096)
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Move control to configure.ac and Config.h?
|
||||
#ifdef POINTER_CACHE
|
||||
|
||||
class PointerCache {
|
||||
private:
|
||||
/*Pinning pages is costly*/
|
||||
/*Could maintain separate large and small allocation caches*/
|
||||
#ifdef GRID_NVCC
|
||||
static const int Ncache=128;
|
||||
#else
|
||||
static const int Ncache=8;
|
||||
#endif
|
||||
static int victim;
|
||||
/* Could make these configurable, perhaps up to a max size*/
|
||||
static const int NcacheSmallMax=128;
|
||||
static const int NcacheMax=16;
|
||||
static int NcacheSmall;
|
||||
static int Ncache;
|
||||
|
||||
typedef struct {
|
||||
void *address;
|
||||
@ -64,15 +64,18 @@ private:
|
||||
int valid;
|
||||
} PointerCacheEntry;
|
||||
|
||||
static PointerCacheEntry Entries[Ncache];
|
||||
static PointerCacheEntry Entries[NcacheMax];
|
||||
static int Victim;
|
||||
static PointerCacheEntry EntriesSmall[NcacheSmallMax];
|
||||
static int VictimSmall;
|
||||
|
||||
public:
|
||||
|
||||
static void Init(void);
|
||||
static void *Insert(void *ptr,size_t bytes) ;
|
||||
static void *Insert(void *ptr,size_t bytes,PointerCacheEntry *entries,int ncache,int &victim) ;
|
||||
static void *Lookup(size_t bytes) ;
|
||||
|
||||
static void *Lookup(size_t bytes,PointerCacheEntry *entries,int ncache) ;
|
||||
};
|
||||
#endif
|
||||
|
||||
std::string sizeString(size_t bytes);
|
||||
|
||||
@ -89,6 +92,13 @@ public:
|
||||
static bool debug;
|
||||
};
|
||||
|
||||
#ifdef GRID_NVCC
|
||||
#define profilerCudaMeminfo \
|
||||
{ size_t f, t ; cudaMemGetInfo ( &f,&t); std::cout << GridLogDebug << "[Memory debug] Cuda free "<<f<<"/"<<t << std::endl;}
|
||||
#else
|
||||
#define profilerCudaMeminfo
|
||||
#endif
|
||||
|
||||
#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")"
|
||||
#define profilerDebugPrint \
|
||||
if (MemoryProfiler::stats) \
|
||||
@ -103,7 +113,8 @@ public:
|
||||
<< std::endl; \
|
||||
std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \
|
||||
<< std::endl; \
|
||||
}
|
||||
} \
|
||||
profilerCudaMeminfo;
|
||||
|
||||
#define profilerAllocate(bytes) \
|
||||
if (MemoryProfiler::stats) \
|
||||
|
@ -114,6 +114,7 @@ public:
|
||||
void GlobalSumVector(RealD *,int N);
|
||||
void GlobalSum(uint32_t &);
|
||||
void GlobalSum(uint64_t &);
|
||||
void GlobalSumVector(uint64_t*,int N);
|
||||
void GlobalSum(ComplexF &c);
|
||||
void GlobalSumVector(ComplexF *c,int N);
|
||||
void GlobalSum(ComplexD &c);
|
||||
|
@ -255,6 +255,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(uint64_t* u,int N){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,u,N,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalXOR(uint32_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
|
||||
assert(ierr==0);
|
||||
|
@ -70,9 +70,10 @@ CartesianCommunicator::~CartesianCommunicator(){}
|
||||
void CartesianCommunicator::GlobalSum(float &){}
|
||||
void CartesianCommunicator::GlobalSumVector(float *,int N){}
|
||||
void CartesianCommunicator::GlobalSum(double &){}
|
||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
||||
void CartesianCommunicator::GlobalSum(uint64_t &){}
|
||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
||||
void CartesianCommunicator::GlobalSumVector(uint64_t *,int N){}
|
||||
void CartesianCommunicator::GlobalXOR(uint32_t &){}
|
||||
void CartesianCommunicator::GlobalXOR(uint64_t &){}
|
||||
|
||||
|
@ -74,7 +74,9 @@ void *SharedMemory::ShmBufferMalloc(size_t bytes){
|
||||
if (heap_bytes >= heap_size) {
|
||||
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
||||
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
||||
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
|
||||
std::cout<< " Current alloc is " << (bytes/(1024*1024)) <<"MB"<<std::endl;
|
||||
std::cout<< " Current bytes is " << (heap_bytes/(1024*1024)) <<"MB"<<std::endl;
|
||||
std::cout<< " Current heap is " << (heap_size/(1024*1024)) <<"MB"<<std::endl;
|
||||
assert(heap_bytes<heap_size);
|
||||
}
|
||||
//std::cerr << "ShmBufferMalloc "<<std::hex<< ptr<<" - "<<((uint64_t)ptr+bytes)<<std::dec<<std::endl;
|
||||
|
@ -49,4 +49,29 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<typename Op, typename T1>
|
||||
auto Cshift(const LatticeUnaryExpression<Op,T1> &expr,int dim,int shift)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||
{
|
||||
return Cshift(closure(expr),dim,shift);
|
||||
}
|
||||
template <class Op, class T1, class T2>
|
||||
auto Cshift(const LatticeBinaryExpression<Op,T1,T2> &expr,int dim,int shift)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))>
|
||||
{
|
||||
return Cshift(closure(expr),dim,shift);
|
||||
}
|
||||
template <class Op, class T1, class T2, class T3>
|
||||
auto Cshift(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr,int dim,int shift)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
|
||||
eval(0, expr.arg2),
|
||||
eval(0, expr.arg3)))>
|
||||
{
|
||||
return Cshift(closure(expr),dim,shift);
|
||||
}
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
||||
|
@ -35,7 +35,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/lattice/Lattice_local.h>
|
||||
#include <Grid/lattice/Lattice_reduction.h>
|
||||
#include <Grid/lattice/Lattice_peekpoke.h>
|
||||
#include <Grid/lattice/Lattice_reality.h>
|
||||
//#include <Grid/lattice/Lattice_reality.h>
|
||||
#include <Grid/lattice/Lattice_comparison_utils.h>
|
||||
#include <Grid/lattice/Lattice_comparison.h>
|
||||
#include <Grid/lattice/Lattice_coordinate.h>
|
||||
@ -43,4 +43,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/lattice/Lattice_rng.h>
|
||||
#include <Grid/lattice/Lattice_unary.h>
|
||||
#include <Grid/lattice/Lattice_transfer.h>
|
||||
|
||||
#include <Grid/lattice/Lattice_basis.h>
|
||||
|
@ -9,6 +9,7 @@ Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: Christoph Lehner <christoph@lhnr.de
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -94,7 +95,7 @@ const lobj & eval(const uint64_t ss, const LatticeView<lobj> &arg)
|
||||
template <class lobj> accelerator_inline
|
||||
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg)
|
||||
{
|
||||
auto view = arg.View();
|
||||
auto view = arg.AcceleratorView(ViewRead);
|
||||
return view[ss];
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -36,9 +37,9 @@ NAMESPACE_BEGIN(Grid);
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
@ -55,9 +56,9 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -72,9 +73,9 @@ void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -88,9 +89,9 @@ void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -107,8 +108,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
mult(&tmp,&lhs_v(ss),&rhs);
|
||||
@ -120,8 +121,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -134,8 +135,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -147,8 +148,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
@ -164,8 +165,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
@ -178,8 +179,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
@ -192,8 +193,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
@ -205,8 +206,8 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto rhs_v = lhs.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
@ -220,9 +221,9 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
auto ret_v = ret.View();
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto x_v = x.AcceleratorView(ViewRead);
|
||||
auto y_v = y.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
||||
auto tmp = a*x_v(ss)+y_v(ss);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
@ -233,9 +234,9 @@ void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
auto ret_v = ret.View();
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
auto x_v = x.AcceleratorView(ViewRead);
|
||||
auto y_v = y.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
||||
auto tmp = a*x_v(ss)+b*y_v(ss);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
|
@ -9,6 +9,7 @@ Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -49,6 +50,26 @@ void accelerator_inline conformable(GridBase *lhs,GridBase *rhs)
|
||||
assert(lhs == rhs);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Advise the LatticeAccelerator class
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
enum LatticeAcceleratorAdvise {
|
||||
AdviseInfrequentUse = 0x1, // Advise that the data is used infrequently. This can
|
||||
// significantly influence performance of bulk storage.
|
||||
AdviseReadMostly = 0x2, // Data will mostly be read. On some architectures
|
||||
// enables read-only copies of memory to be kept on
|
||||
// host and device.
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// View Access Mode
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
enum ViewMode {
|
||||
ViewRead = 0x1,
|
||||
ViewWrite = 0x2,
|
||||
ViewReadWrite = 0x3
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Minimal base class containing only data valid to access from accelerator
|
||||
// _odata will be a managed pointer in CUDA
|
||||
@ -75,6 +96,37 @@ public:
|
||||
if (grid) conformable(grid, _grid);
|
||||
else grid = _grid;
|
||||
};
|
||||
|
||||
accelerator_inline void Advise(int advise) {
|
||||
#ifdef GRID_NVCC
|
||||
#ifndef __CUDA_ARCH__ // only on host
|
||||
if (advise & AdviseInfrequentUse) {
|
||||
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetPreferredLocation,cudaCpuDeviceId);
|
||||
}
|
||||
if (advise & AdviseReadMostly) {
|
||||
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
accelerator_inline void AcceleratorPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
|
||||
#ifdef GRID_NVCC
|
||||
#ifndef __CUDA_ARCH__ // only on host
|
||||
int target;
|
||||
cudaGetDevice(&target);
|
||||
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),target);
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
accelerator_inline void HostPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
|
||||
#ifdef GRID_NVCC
|
||||
#ifndef __CUDA_ARCH__ // only on host
|
||||
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId);
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -206,9 +258,23 @@ public:
|
||||
// The view is trivially copy constructible and may be copied to an accelerator device
|
||||
// in device lambdas
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
LatticeView<vobj> View (void) const
|
||||
LatticeView<vobj> View (void) const // deprecated, should pick AcceleratorView for accelerator_for
|
||||
{ // and HostView for thread_for
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||
return accessor;
|
||||
}
|
||||
|
||||
LatticeView<vobj> AcceleratorView(int mode = ViewReadWrite) const
|
||||
{
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||
accessor.AcceleratorPrefetch(mode);
|
||||
return accessor;
|
||||
}
|
||||
|
||||
LatticeView<vobj> HostView(int mode = ViewReadWrite) const
|
||||
{
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||
accessor.HostPrefetch(mode);
|
||||
return accessor;
|
||||
}
|
||||
|
||||
@ -232,7 +298,7 @@ public:
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
auto me = View();
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
auto tmp = eval(ss,expr);
|
||||
vstream(me[ss],tmp);
|
||||
@ -251,7 +317,7 @@ public:
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
auto me = View();
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
auto tmp = eval(ss,expr);
|
||||
vstream(me[ss],tmp);
|
||||
@ -269,7 +335,7 @@ public:
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
auto me = View();
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
auto tmp = eval(ss,expr);
|
||||
vstream(me[ss],tmp);
|
||||
@ -357,7 +423,6 @@ public:
|
||||
// copy constructor
|
||||
///////////////////////////////////////////
|
||||
Lattice(const Lattice& r){
|
||||
// std::cout << "Lattice constructor(const Lattice &) "<<this<<std::endl;
|
||||
this->_grid = r.Grid();
|
||||
resize(this->_grid->oSites());
|
||||
*this = r;
|
||||
@ -380,8 +445,8 @@ public:
|
||||
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
|
||||
conformable(*this,r);
|
||||
this->checkerboard = r.Checkerboard();
|
||||
auto me = View();
|
||||
auto him= r.View();
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
auto him= r.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(me[ss],him(ss));
|
||||
});
|
||||
@ -394,8 +459,8 @@ public:
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
this->checkerboard = r.Checkerboard();
|
||||
conformable(*this,r);
|
||||
auto me = View();
|
||||
auto him= r.View();
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
auto him= r.AcceleratorView(ViewRead);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(me[ss],him(ss));
|
||||
});
|
||||
|
236
Grid/lattice/Lattice_basis.h
Normal file
236
Grid/lattice/Lattice_basis.h
Normal file
@ -0,0 +1,236 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_basis.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Field>
|
||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
|
||||
{
|
||||
// If assume basis[j] are already orthonormal,
|
||||
// can take all inner products in parallel saving 2x bandwidth
|
||||
// Save 3x bandwidth on the second line of loop.
|
||||
// perhaps 2.5x speed up.
|
||||
// 2x overall in Multigrid Lanczos
|
||||
for(int j=0; j<k; ++j){
|
||||
auto ip = innerProduct(basis[j],w);
|
||||
w = w - ip*basis[j];
|
||||
}
|
||||
}
|
||||
|
||||
template<class VField, class Matrix>
|
||||
void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef decltype(basis[0]) Field;
|
||||
typedef decltype(basis[0].View()) View;
|
||||
auto tmp_v = basis[0].AcceleratorView(ViewReadWrite);
|
||||
Vector<View> basis_v(basis.size(),tmp_v);
|
||||
typedef typename std::remove_reference<decltype(tmp_v[0])>::type vobj;
|
||||
GridBase* grid = basis[0].Grid();
|
||||
|
||||
for(int k=0;k<basis.size();k++){
|
||||
basis_v[k] = basis[k].AcceleratorView(ViewReadWrite);
|
||||
}
|
||||
|
||||
#ifndef GRID_NVCC
|
||||
thread_region
|
||||
{
|
||||
std::vector < vobj > B(Nm); // Thread private
|
||||
thread_for_in_region(ss, grid->oSites(),{
|
||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||
|
||||
for(int j=j0; j<j1; ++j){
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B[j] +=Qt(j,k) * basis_v[k][ss];
|
||||
}
|
||||
}
|
||||
for(int j=j0; j<j1; ++j){
|
||||
basis_v[j][ss] = B[j];
|
||||
}
|
||||
});
|
||||
}
|
||||
#else
|
||||
int nrot = j1-j0;
|
||||
if (!nrot) // edge case not handled gracefully by Cuda
|
||||
return;
|
||||
|
||||
uint64_t oSites =grid->oSites();
|
||||
uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead
|
||||
|
||||
Vector <vobj> Bt(siteBlock * nrot);
|
||||
auto Bp=&Bt[0];
|
||||
|
||||
// GPU readable copy of matrix
|
||||
Vector<double> Qt_jv(Nm*Nm);
|
||||
double *Qt_p = & Qt_jv[0];
|
||||
thread_for(i,Nm*Nm,{
|
||||
int j = i/Nm;
|
||||
int k = i%Nm;
|
||||
Qt_p[i]=Qt(j,k);
|
||||
});
|
||||
|
||||
// Block the loop to keep storage footprint down
|
||||
for(uint64_t s=0;s<oSites;s+=siteBlock){
|
||||
|
||||
// remaining work in this block
|
||||
int ssites=MIN(siteBlock,oSites-s);
|
||||
|
||||
// zero out the accumulators
|
||||
accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{
|
||||
decltype(coalescedRead(Bp[ss])) z;
|
||||
z=Zero();
|
||||
coalescedWrite(Bp[ss],z);
|
||||
});
|
||||
|
||||
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
||||
|
||||
int j =sj%nrot;
|
||||
int jj =j0+j;
|
||||
int ss =sj/nrot;
|
||||
int sss=ss+s;
|
||||
|
||||
for(int k=k0; k<k1; ++k){
|
||||
auto tmp = coalescedRead(Bp[ss*nrot+j]);
|
||||
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss]));
|
||||
}
|
||||
});
|
||||
|
||||
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
||||
int j =sj%nrot;
|
||||
int jj =j0+j;
|
||||
int ss =sj/nrot;
|
||||
int sss=ss+s;
|
||||
coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j]));
|
||||
});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Extract a single rotated vector
|
||||
template<class Field>
|
||||
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef decltype(basis[0].AcceleratorView()) View;
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0].Grid();
|
||||
|
||||
result.Checkerboard() = basis[0].Checkerboard();
|
||||
auto result_v=result.AcceleratorView(ViewWrite);
|
||||
Vector<View> basis_v(basis.size(),result_v);
|
||||
for(int k=0;k<basis.size();k++){
|
||||
basis_v[k] = basis[k].AcceleratorView(ViewRead);
|
||||
}
|
||||
vobj zz=Zero();
|
||||
Vector<double> Qt_jv(Nm);
|
||||
double * Qt_j = & Qt_jv[0];
|
||||
for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k);
|
||||
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
|
||||
auto B=coalescedRead(zz);
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B +=Qt_j[k] * coalescedRead(basis_v[k][ss]);
|
||||
}
|
||||
coalescedWrite(result_v[ss], B);
|
||||
});
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
|
||||
{
|
||||
int vlen = idx.size();
|
||||
|
||||
assert(vlen>=1);
|
||||
assert(vlen<=sort_vals.size());
|
||||
assert(vlen<=_v.size());
|
||||
|
||||
for (size_t i=0;i<vlen;i++) {
|
||||
|
||||
if (idx[i] != i) {
|
||||
|
||||
//////////////////////////////////////
|
||||
// idx[i] is a table of desired sources giving a permutation.
|
||||
// Swap v[i] with v[idx[i]].
|
||||
// Find j>i for which _vnew[j] = _vold[i],
|
||||
// track the move idx[j] => idx[i]
|
||||
// track the move idx[i] => i
|
||||
//////////////////////////////////////
|
||||
size_t j;
|
||||
for (j=i;j<idx.size();j++)
|
||||
if (idx[j]==i)
|
||||
break;
|
||||
|
||||
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
|
||||
|
||||
swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy
|
||||
std::swap(sort_vals[i],sort_vals[idx[i]]);
|
||||
|
||||
idx[j] = idx[i];
|
||||
idx[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
|
||||
{
|
||||
std::vector<int> idx(sort_vals.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
|
||||
// sort indexes based on comparing values in v
|
||||
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
|
||||
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
|
||||
{
|
||||
std::vector<int> idx = basisSortGetIndex(sort_vals);
|
||||
if (reverse)
|
||||
std::reverse(idx.begin(), idx.end());
|
||||
|
||||
basisReorderInPlace(_v,sort_vals,idx);
|
||||
}
|
||||
|
||||
// PAB: faster to compute the inner products first then fuse loops.
|
||||
// If performance critical can improve.
|
||||
template<class Field>
|
||||
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
|
||||
result = Zero();
|
||||
assert(_v.size()==eval.size());
|
||||
int N = (int)_v.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
Field& tmp = _v[i];
|
||||
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
||||
}
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -156,7 +156,7 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
accelerator_inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
|
||||
inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
|
||||
|
||||
GridBase *grid = l.Grid();
|
||||
|
||||
@ -185,7 +185,7 @@ accelerator_inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate
|
||||
};
|
||||
|
||||
template<class vobj,class sobj>
|
||||
accelerator_inline void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){
|
||||
inline void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){
|
||||
|
||||
GridBase *grid=l.Grid();
|
||||
|
||||
|
@ -40,6 +40,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
||||
@ -50,6 +51,7 @@ template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
|
||||
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
||||
|
@ -5,6 +5,7 @@
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
@ -93,7 +94,7 @@ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
|
||||
// Double inner product
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
@ -102,8 +103,8 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
||||
GridBase *grid = left.Grid();
|
||||
|
||||
// Might make all code paths go this way.
|
||||
auto left_v = left.View();
|
||||
auto right_v=right.View();
|
||||
auto left_v = left.AcceleratorView(ViewRead);
|
||||
auto right_v=right.AcceleratorView(ViewRead);
|
||||
|
||||
const uint64_t nsimd = grid->Nsimd();
|
||||
const uint64_t sites = grid->oSites();
|
||||
@ -137,11 +138,18 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
||||
})
|
||||
nrm = TensorRemove(sum(inner_tmp_v,sites));
|
||||
#endif
|
||||
grid->GlobalSum(nrm);
|
||||
|
||||
return nrm;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) {
|
||||
GridBase *grid = left.Grid();
|
||||
ComplexD nrm = rankInnerProduct(left,right);
|
||||
grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////
|
||||
// Fast axpby_norm
|
||||
// z = a x + b y
|
||||
@ -167,9 +175,9 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
||||
|
||||
GridBase *grid = x.Grid();
|
||||
|
||||
auto x_v=x.View();
|
||||
auto y_v=y.View();
|
||||
auto z_v=z.View();
|
||||
auto x_v=x.AcceleratorView(ViewRead);
|
||||
auto y_v=y.AcceleratorView(ViewRead);
|
||||
auto z_v=z.AcceleratorView(ViewWrite);
|
||||
|
||||
const uint64_t nsimd = grid->Nsimd();
|
||||
const uint64_t sites = grid->oSites();
|
||||
@ -204,8 +212,64 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
||||
grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
|
||||
template<class vobj> strong_inline void
|
||||
innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
{
|
||||
conformable(left,right);
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
Vector<ComplexD> tmp(2);
|
||||
|
||||
GridBase *grid = left.Grid();
|
||||
|
||||
auto left_v=left.AcceleratorView(ViewRead);
|
||||
auto right_v=right.AcceleratorView(ViewRead);
|
||||
|
||||
const uint64_t nsimd = grid->Nsimd();
|
||||
const uint64_t sites = grid->oSites();
|
||||
|
||||
#ifdef GRID_NVCC
|
||||
// GPU
|
||||
typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t;
|
||||
typedef decltype(innerProduct(left_v[0],left_v[0])) norm_t;
|
||||
Vector<inner_t> inner_tmp(sites);
|
||||
Vector<norm_t> norm_tmp(sites);
|
||||
auto inner_tmp_v = &inner_tmp[0];
|
||||
auto norm_tmp_v = &norm_tmp[0];
|
||||
|
||||
accelerator_for( ss, sites, nsimd,{
|
||||
auto left_tmp = left_v(ss);
|
||||
coalescedWrite(inner_tmp_v[ss],innerProduct(left_tmp,right_v(ss)));
|
||||
coalescedWrite(norm_tmp_v[ss],innerProduct(left_tmp,left_tmp));
|
||||
});
|
||||
|
||||
tmp[0] = TensorRemove(sumD_gpu(inner_tmp_v,sites));
|
||||
tmp[1] = TensorRemove(sumD_gpu(norm_tmp_v,sites));
|
||||
#else
|
||||
// CPU
|
||||
typedef decltype(innerProductD(left_v[0],right_v[0])) inner_t;
|
||||
typedef decltype(innerProductD(left_v[0],left_v[0])) norm_t;
|
||||
Vector<inner_t> inner_tmp(sites);
|
||||
Vector<norm_t> norm_tmp(sites);
|
||||
auto inner_tmp_v = &inner_tmp[0];
|
||||
auto norm_tmp_v = &norm_tmp[0];
|
||||
|
||||
accelerator_for( ss, sites, nsimd,{
|
||||
auto left_tmp = left_v(ss);
|
||||
inner_tmp_v[ss] = innerProductD(left_tmp,right_v(ss));
|
||||
norm_tmp_v[ss] = innerProductD(left_tmp,left_tmp);
|
||||
});
|
||||
// Already promoted to double
|
||||
tmp[0] = TensorRemove(sum(inner_tmp_v,sites));
|
||||
tmp[1] = TensorRemove(sum(norm_tmp_v,sites));
|
||||
#endif
|
||||
grid->GlobalSumVector(&tmp[0],2); // keep norm Complex -> can use GlobalSumVector
|
||||
ip = tmp[0];
|
||||
nrm = real(tmp[1]);
|
||||
}
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.op.func(eval(0,expr.arg1)))::scalar_object
|
||||
|
@ -37,6 +37,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
template<class vobj>
|
||||
inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))>
|
||||
{
|
||||
@ -48,6 +49,7 @@ inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))>
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace Index level dependent operation
|
||||
|
@ -6,6 +6,7 @@
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -63,6 +64,7 @@ template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,con
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
|
||||
int cb = half.Checkerboard();
|
||||
auto half_v = half.View();
|
||||
@ -81,25 +83,130 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<class vobj,class CComplex,int nbasis>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Flexible Type Conversion for internal promotion to double as well as graceful
|
||||
// treatment of scalar-compatible types
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
accelerator_inline void convertType(ComplexD & out, const std::complex<double> & in) {
|
||||
out = in;
|
||||
}
|
||||
|
||||
accelerator_inline void convertType(ComplexF & out, const std::complex<float> & in) {
|
||||
out = in;
|
||||
}
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
accelerator_inline void convertType(vComplexF & out, const ComplexF & in) {
|
||||
((ComplexF*)&out)[SIMTlane(vComplexF::Nsimd())] = in;
|
||||
}
|
||||
accelerator_inline void convertType(vComplexD & out, const ComplexD & in) {
|
||||
((ComplexD*)&out)[SIMTlane(vComplexD::Nsimd())] = in;
|
||||
}
|
||||
accelerator_inline void convertType(vComplexD2 & out, const ComplexD & in) {
|
||||
((ComplexD*)&out)[SIMTlane(vComplexD::Nsimd()*2)] = in;
|
||||
}
|
||||
#endif
|
||||
|
||||
accelerator_inline void convertType(vComplexF & out, const vComplexD2 & in) {
|
||||
out.v = Optimization::PrecisionChange::DtoS(in._internal[0].v,in._internal[1].v);
|
||||
}
|
||||
|
||||
accelerator_inline void convertType(vComplexD2 & out, const vComplexF & in) {
|
||||
Optimization::PrecisionChange::StoD(in.v,out._internal[0].v,out._internal[1].v);
|
||||
}
|
||||
|
||||
template<typename T1,typename T2,int N>
|
||||
accelerator_inline void convertType(iMatrix<T1,N> & out, const iMatrix<T2,N> & in);
|
||||
template<typename T1,typename T2,int N>
|
||||
accelerator_inline void convertType(iVector<T1,N> & out, const iVector<T2,N> & in);
|
||||
|
||||
template<typename T1,typename T2, typename std::enable_if<!isGridScalar<T1>::value, T1>::type* = nullptr>
|
||||
accelerator_inline void convertType(T1 & out, const iScalar<T2> & in) {
|
||||
convertType(out,in._internal);
|
||||
}
|
||||
|
||||
template<typename T1,typename T2>
|
||||
accelerator_inline void convertType(iScalar<T1> & out, const T2 & in) {
|
||||
convertType(out._internal,in);
|
||||
}
|
||||
|
||||
template<typename T1,typename T2,int N>
|
||||
accelerator_inline void convertType(iMatrix<T1,N> & out, const iMatrix<T2,N> & in) {
|
||||
for (int i=0;i<N;i++)
|
||||
for (int j=0;j<N;j++)
|
||||
convertType(out._internal[i][j],in._internal[i][j]);
|
||||
}
|
||||
|
||||
template<typename T1,typename T2,int N>
|
||||
accelerator_inline void convertType(iVector<T1,N> & out, const iVector<T2,N> & in) {
|
||||
for (int i=0;i<N;i++)
|
||||
convertType(out._internal[i],in._internal[i]);
|
||||
}
|
||||
|
||||
template<typename T, typename std::enable_if<isGridFundamental<T>::value, T>::type* = nullptr>
|
||||
accelerator_inline void convertType(T & out, const T & in) {
|
||||
out = in;
|
||||
}
|
||||
|
||||
template<typename T1,typename T2>
|
||||
accelerator_inline void convertType(Lattice<T1> & out, const Lattice<T2> & in) {
|
||||
auto out_v = out.AcceleratorView(ViewWrite);
|
||||
auto in_v = in.AcceleratorView(ViewRead);
|
||||
|
||||
accelerator_for(ss,out_v.size(),T1::Nsimd(),{
|
||||
convertType(out_v[ss],in_v(ss));
|
||||
});
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// precision-promoted local inner product
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline auto localInnerProductD(const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
|
||||
-> Lattice<iScalar<decltype(TensorRemove(innerProductD2(lhs.View()[0],rhs.View()[0])))>>
|
||||
{
|
||||
auto lhs_v = lhs.AcceleratorView(ViewRead);
|
||||
auto rhs_v = rhs.AcceleratorView(ViewRead);
|
||||
|
||||
typedef decltype(TensorRemove(innerProductD2(lhs_v[0],rhs_v[0]))) t_inner;
|
||||
Lattice<iScalar<t_inner>> ret(lhs.Grid());
|
||||
auto ret_v = ret.AcceleratorView(ViewWrite);
|
||||
|
||||
accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{
|
||||
convertType(ret_v[ss],innerProductD2(lhs_v(ss),rhs_v(ss)));
|
||||
});
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// block routines
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj,class CComplex,int nbasis,class VLattice>
|
||||
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
const Lattice<vobj> &fineData,
|
||||
const std::vector<Lattice<vobj> > &Basis)
|
||||
const Lattice<vobj> &fineData,
|
||||
const VLattice &Basis)
|
||||
{
|
||||
GridBase * fine = fineData.Grid();
|
||||
GridBase * coarse= coarseData.Grid();
|
||||
|
||||
Lattice<CComplex> ip(coarse);
|
||||
Lattice<iScalar<CComplex>> ip(coarse);
|
||||
Lattice<vobj> fineDataRed = fineData;
|
||||
|
||||
// auto fineData_ = fineData.View();
|
||||
auto coarseData_ = coarseData.View();
|
||||
auto ip_ = ip.View();
|
||||
auto coarseData_ = coarseData.AcceleratorView(ViewWrite);
|
||||
auto ip_ = ip.AcceleratorView(ViewReadWrite);
|
||||
for(int v=0;v<nbasis;v++) {
|
||||
blockInnerProduct(ip,Basis[v],fineData);
|
||||
blockInnerProductD(ip,Basis[v],fineDataRed); // ip = <basis|fine>
|
||||
accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), {
|
||||
coalescedWrite(coarseData_[sc](v),ip_(sc));
|
||||
convertType(coarseData_[sc](v),ip_[sc]);
|
||||
});
|
||||
|
||||
// improve numerical stability of projection
|
||||
// |fine> = |fine> - <basis|fine> |basis>
|
||||
ip=-ip;
|
||||
blockZAXPY(fineDataRed,ip,Basis[v],fineDataRed);
|
||||
}
|
||||
}
|
||||
|
||||
@ -166,11 +273,11 @@ inline void blockProject1(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
return;
|
||||
}
|
||||
|
||||
template<class vobj,class CComplex>
|
||||
inline void blockZAXPY(Lattice<vobj> &fineZ,
|
||||
const Lattice<CComplex> &coarseA,
|
||||
const Lattice<vobj> &fineX,
|
||||
const Lattice<vobj> &fineY)
|
||||
template<class vobj,class vobj2,class CComplex>
|
||||
inline void blockZAXPY(Lattice<vobj> &fineZ,
|
||||
const Lattice<CComplex> &coarseA,
|
||||
const Lattice<vobj2> &fineX,
|
||||
const Lattice<vobj> &fineY)
|
||||
{
|
||||
GridBase * fine = fineZ.Grid();
|
||||
GridBase * coarse= coarseA.Grid();
|
||||
@ -182,7 +289,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
||||
conformable(fineX,fineZ);
|
||||
|
||||
int _ndimension = coarse->_ndimension;
|
||||
|
||||
|
||||
Coordinate block_r (_ndimension);
|
||||
|
||||
// FIXME merge with subdivide checking routine as this is redundant
|
||||
@ -191,29 +298,65 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
||||
assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]);
|
||||
}
|
||||
|
||||
auto fineZ_ = fineZ.View();
|
||||
auto fineX_ = fineX.View();
|
||||
auto fineY_ = fineY.View();
|
||||
auto coarseA_= coarseA.View();
|
||||
auto fineZ_ = fineZ.AcceleratorView(ViewWrite);
|
||||
auto fineX_ = fineX.AcceleratorView(ViewRead);
|
||||
auto fineY_ = fineY.AcceleratorView(ViewRead);
|
||||
auto coarseA_= coarseA.AcceleratorView(ViewRead);
|
||||
|
||||
accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), {
|
||||
|
||||
int sc;
|
||||
Coordinate coor_c(_ndimension);
|
||||
Coordinate coor_f(_ndimension);
|
||||
|
||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
int sc;
|
||||
Coordinate coor_c(_ndimension);
|
||||
Coordinate coor_f(_ndimension);
|
||||
|
||||
// z = A x + y
|
||||
coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf));
|
||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
});
|
||||
// z = A x + y
|
||||
#ifdef __CUDA_ARCH__
|
||||
typename vobj2::tensor_reduced::scalar_object cA;
|
||||
typename vobj::scalar_object cAx;
|
||||
#else
|
||||
typename vobj2::tensor_reduced cA;
|
||||
vobj cAx;
|
||||
#endif
|
||||
convertType(cA,TensorRemove(coarseA_(sc)));
|
||||
auto prod = cA*fineX_(sf);
|
||||
convertType(cAx,prod);
|
||||
coalescedWrite(fineZ_[sf],cAx+fineY_(sf));
|
||||
|
||||
});
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<class vobj,class CComplex>
|
||||
inline void blockInnerProductD(Lattice<CComplex> &CoarseInner,
|
||||
const Lattice<vobj> &fineX,
|
||||
const Lattice<vobj> &fineY)
|
||||
{
|
||||
typedef iScalar<decltype(TensorRemove(innerProductD2(vobj(),vobj())))> dotp;
|
||||
|
||||
GridBase *coarse(CoarseInner.Grid());
|
||||
GridBase *fine (fineX.Grid());
|
||||
|
||||
Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
|
||||
Lattice<dotp> coarse_inner(coarse);
|
||||
|
||||
auto CoarseInner_ = CoarseInner.AcceleratorView(ViewWrite);
|
||||
auto coarse_inner_ = coarse_inner.AcceleratorView(ViewReadWrite);
|
||||
|
||||
// Precision promotion
|
||||
fine_inner = localInnerProductD(fineX,fineY);
|
||||
blockSum(coarse_inner,fine_inner);
|
||||
accelerator_for(ss, coarse->oSites(), 1, {
|
||||
convertType(CoarseInner_[ss], TensorRemove(coarse_inner_[ss]));
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
template<class vobj,class CComplex> // deprecate
|
||||
inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
||||
const Lattice<vobj> &fineX,
|
||||
const Lattice<vobj> &fineY)
|
||||
@ -227,8 +370,8 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
||||
Lattice<dotp> coarse_inner(coarse);
|
||||
|
||||
// Precision promotion?
|
||||
auto CoarseInner_ = CoarseInner.View();
|
||||
auto coarse_inner_ = coarse_inner.View();
|
||||
auto CoarseInner_ = CoarseInner.AcceleratorView(ViewWrite);
|
||||
auto coarse_inner_ = coarse_inner.AcceleratorView(ViewReadWrite);
|
||||
|
||||
fine_inner = localInnerProduct(fineX,fineY);
|
||||
blockSum(coarse_inner,fine_inner);
|
||||
@ -236,6 +379,7 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
||||
CoarseInner_[ss] = coarse_inner_[ss];
|
||||
});
|
||||
}
|
||||
|
||||
template<class vobj,class CComplex>
|
||||
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
||||
{
|
||||
@ -248,7 +392,7 @@ inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
||||
// useful in multigrid project;
|
||||
// Generic name : Coarsen?
|
||||
template<class vobj>
|
||||
inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
{
|
||||
GridBase * fine = fineData.Grid();
|
||||
GridBase * coarse= coarseData.Grid();
|
||||
@ -256,42 +400,41 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
subdivides(coarse,fine); // require they map
|
||||
|
||||
int _ndimension = coarse->_ndimension;
|
||||
|
||||
|
||||
Coordinate block_r (_ndimension);
|
||||
|
||||
|
||||
for(int d=0 ; d<_ndimension;d++){
|
||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||
}
|
||||
int blockVol = fine->oSites()/coarse->oSites();
|
||||
|
||||
// Turn this around to loop threaded over sc and interior loop
|
||||
// over sf would thread better
|
||||
auto coarseData_ = coarseData.View();
|
||||
auto fineData_ = fineData.View();
|
||||
auto coarseData_ = coarseData.AcceleratorView(ViewReadWrite);
|
||||
auto fineData_ = fineData.AcceleratorView(ViewRead);
|
||||
|
||||
accelerator_for(sc,coarse->oSites(),1,{
|
||||
|
||||
// One thread per sub block
|
||||
Coordinate coor_c(_ndimension);
|
||||
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
|
||||
coarseData_[sc]=Zero();
|
||||
// One thread per sub block
|
||||
Coordinate coor_c(_ndimension);
|
||||
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
|
||||
coarseData_[sc]=Zero();
|
||||
|
||||
for(int sb=0;sb<blockVol;sb++){
|
||||
|
||||
int sf;
|
||||
Coordinate coor_b(_ndimension);
|
||||
Coordinate coor_f(_ndimension);
|
||||
Lexicographic::CoorFromIndex(coor_b,sb,block_r); // Block sub coordinate
|
||||
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
|
||||
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
|
||||
for(int sb=0;sb<blockVol;sb++){
|
||||
|
||||
coarseData_[sc]=coarseData_[sc]+fineData_[sf];
|
||||
}
|
||||
int sf;
|
||||
Coordinate coor_b(_ndimension);
|
||||
Coordinate coor_f(_ndimension);
|
||||
Lexicographic::CoorFromIndex(coor_b,sb,block_r); // Block sub coordinate
|
||||
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
|
||||
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
|
||||
|
||||
});
|
||||
coarseData_[sc]=coarseData_[sc]+fineData_[sf];
|
||||
}
|
||||
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
template<class vobj>
|
||||
inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vobj> &picked,Coordinate coor)
|
||||
{
|
||||
@ -313,8 +456,8 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class CComplex>
|
||||
inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> > &Basis)
|
||||
template<class CComplex,class VLattice>
|
||||
inline void blockOrthonormalize(Lattice<CComplex> &ip,VLattice &Basis)
|
||||
{
|
||||
GridBase *coarse = ip.Grid();
|
||||
GridBase *fine = Basis[0].Grid();
|
||||
@ -322,23 +465,30 @@ inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> >
|
||||
int nbasis = Basis.size() ;
|
||||
|
||||
// checks
|
||||
subdivides(coarse,fine);
|
||||
subdivides(coarse,fine);
|
||||
for(int i=0;i<nbasis;i++){
|
||||
conformable(Basis[i].Grid(),fine);
|
||||
}
|
||||
|
||||
for(int v=0;v<nbasis;v++) {
|
||||
for(int u=0;u<v;u++) {
|
||||
//Inner product & remove component
|
||||
blockInnerProduct(ip,Basis[u],Basis[v]);
|
||||
//Inner product & remove component
|
||||
blockInnerProductD(ip,Basis[u],Basis[v]);
|
||||
ip = -ip;
|
||||
blockZAXPY<vobj,CComplex> (Basis[v],ip,Basis[u],Basis[v]);
|
||||
blockZAXPY(Basis[v],ip,Basis[u],Basis[v]);
|
||||
}
|
||||
blockNormalise(ip,Basis[v]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class CComplex>
|
||||
inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> > &Basis) // deprecated inaccurate naming
|
||||
{
|
||||
blockOrthonormalize(ip,Basis);
|
||||
}
|
||||
|
||||
#if 0
|
||||
// TODO: CPU optimized version here
|
||||
template<class vobj,class CComplex,int nbasis>
|
||||
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
Lattice<vobj> &fineData,
|
||||
@ -383,24 +533,18 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
|
||||
}
|
||||
#else
|
||||
template<class vobj,class CComplex,int nbasis>
|
||||
template<class vobj,class CComplex,int nbasis,class VLattice>
|
||||
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
Lattice<vobj> &fineData,
|
||||
const std::vector<Lattice<vobj> > &Basis)
|
||||
const VLattice &Basis)
|
||||
{
|
||||
GridBase * fine = fineData.Grid();
|
||||
GridBase * coarse= coarseData.Grid();
|
||||
|
||||
fineData=Zero();
|
||||
for(int i=0;i<nbasis;i++) {
|
||||
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
|
||||
Lattice<CComplex> cip(coarse);
|
||||
auto cip_ = cip.View();
|
||||
auto ip_ = ip.View();
|
||||
accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
|
||||
coalescedWrite(cip_[sc], ip_(sc)());
|
||||
});
|
||||
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
|
||||
auto ip_ = ip.AcceleratorView(ViewRead);
|
||||
blockZAXPY(fineData,ip,Basis[i],fineData);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -470,8 +614,8 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
|
||||
Coordinate rdt = Tg->_rdimensions;
|
||||
Coordinate ist = Tg->_istride;
|
||||
Coordinate ost = Tg->_ostride;
|
||||
auto t_v = To.View();
|
||||
auto f_v = From.View();
|
||||
auto t_v = To.AcceleratorView(ViewWrite);
|
||||
auto f_v = From.AcceleratorView(ViewRead);
|
||||
accelerator_for(idx,Fg->lSites(),1,{
|
||||
sobj s;
|
||||
Coordinate Fcoor(nd);
|
||||
|
@ -38,6 +38,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
@ -48,7 +49,8 @@ inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Index level dependent transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -341,7 +341,7 @@ class BinaryIO {
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE"));
|
||||
assert(ieee64||ieee32|ieee64big||ieee32big);
|
||||
assert((ieee64+ieee32+ieee64big+ieee32big)==1);
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -301,6 +301,30 @@ struct GaugeSimpleUnmunger {
|
||||
};
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct GaugeDoubleStoredMunger{
|
||||
void operator()(fobj &in, sobj &out) {
|
||||
for (int mu = 0; mu < Nds; mu++) {
|
||||
for (int i = 0; i < Nc; i++) {
|
||||
for (int j = 0; j < Nc; j++) {
|
||||
out(mu)()(i, j) = in(mu)()(i, j);
|
||||
}}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template <class fobj, class sobj>
|
||||
struct GaugeDoubleStoredUnmunger {
|
||||
void operator()(sobj &in, fobj &out) {
|
||||
for (int mu = 0; mu < Nds; mu++) {
|
||||
for (int i = 0; i < Nc; i++) {
|
||||
for (int j = 0; j < Nc; j++) {
|
||||
out(mu)()(i, j) = in(mu)()(i, j);
|
||||
}}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Gauge3x2munger{
|
||||
void operator() (fobj &in,sobj &out){
|
||||
|
@ -146,7 +146,7 @@ public:
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE"));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
// depending on datatype, set up munger;
|
||||
|
224
Grid/parallelIO/OpenQcdIO.h
Normal file
224
Grid/parallelIO/OpenQcdIO.h
Normal file
@ -0,0 +1,224 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/OpenQcdIO.h
|
||||
|
||||
Copyright (C) 2015 - 2020
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
struct OpenQcdHeader : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(OpenQcdHeader,
|
||||
int, Nt,
|
||||
int, Nx,
|
||||
int, Ny,
|
||||
int, Nz,
|
||||
double, plaq);
|
||||
};
|
||||
|
||||
class OpenQcdIO : public BinaryIO {
|
||||
public:
|
||||
static constexpr double normalisationFactor = Nc; // normalisation difference: grid 18, openqcd 6
|
||||
|
||||
static inline int readHeader(std::string file, GridBase* grid, FieldMetaData& field) {
|
||||
OpenQcdHeader header;
|
||||
|
||||
{
|
||||
std::ifstream fin(file, std::ios::in | std::ios::binary);
|
||||
fin.read(reinterpret_cast<char*>(&header), sizeof(OpenQcdHeader));
|
||||
assert(!fin.fail());
|
||||
field.data_start = fin.tellg();
|
||||
fin.close();
|
||||
}
|
||||
|
||||
header.plaq /= normalisationFactor;
|
||||
|
||||
// sanity check (should trigger on endian issues)
|
||||
assert(0 < header.Nt && header.Nt <= 1024);
|
||||
assert(0 < header.Nx && header.Nx <= 1024);
|
||||
assert(0 < header.Ny && header.Ny <= 1024);
|
||||
assert(0 < header.Nz && header.Nz <= 1024);
|
||||
|
||||
field.dimension[0] = header.Nx;
|
||||
field.dimension[1] = header.Ny;
|
||||
field.dimension[2] = header.Nz;
|
||||
field.dimension[3] = header.Nt;
|
||||
|
||||
std::cout << GridLogDebug << "header: " << header << std::endl;
|
||||
std::cout << GridLogDebug << "grid dimensions: " << grid->_fdimensions << std::endl;
|
||||
std::cout << GridLogDebug << "file dimensions: " << field.dimension << std::endl;
|
||||
|
||||
assert(grid->_ndimension == Nd);
|
||||
for(int d = 0; d < Nd; d++)
|
||||
assert(grid->_fdimensions[d] == field.dimension[d]);
|
||||
|
||||
field.plaquette = header.plaq;
|
||||
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
template<class vsimd>
|
||||
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||
FieldMetaData& header,
|
||||
std::string file) {
|
||||
typedef Lattice<iDoubleStoredColourMatrix<vsimd>> DoubleStoredGaugeField;
|
||||
|
||||
assert(Ns == 4 and Nd == 4 and Nc == 3);
|
||||
|
||||
auto grid = dynamic_cast<GridCartesian*>(Umu.Grid());
|
||||
assert(grid != nullptr); assert(grid->_ndimension == Nd);
|
||||
|
||||
uint64_t offset = readHeader(file, Umu.Grid(), header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
std::string format("IEEE64"); // they always store little endian double precsision
|
||||
uint32_t nersc_csum, scidac_csuma, scidac_csumb;
|
||||
|
||||
GridCartesian* grid_openqcd = createOpenQcdGrid(grid);
|
||||
GridRedBlackCartesian* grid_rb = SpaceTimeGrid::makeFourDimRedBlackGrid(grid);
|
||||
|
||||
typedef DoubleStoredColourMatrixD fobj;
|
||||
typedef typename DoubleStoredGaugeField::vector_object::scalar_object sobj;
|
||||
typedef typename DoubleStoredGaugeField::vector_object::Realified::scalar_type word;
|
||||
|
||||
word w = 0;
|
||||
|
||||
std::vector<fobj> iodata(grid_openqcd->lSites()); // Munge, checksum, byte order in here
|
||||
std::vector<sobj> scalardata(grid->lSites());
|
||||
|
||||
IOobject(w, grid_openqcd, iodata, file, offset, format, BINARYIO_READ | BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum, scidac_csuma, scidac_csumb);
|
||||
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
|
||||
DoubleStoredGaugeField Umu_ds(grid);
|
||||
|
||||
auto munge = GaugeDoubleStoredMunger<DoubleStoredColourMatrixD, DoubleStoredColourMatrix>();
|
||||
|
||||
Coordinate ldim = grid->LocalDimensions();
|
||||
thread_for(idx_g, grid->lSites(), {
|
||||
Coordinate coor;
|
||||
grid->LocalIndexToLocalCoor(idx_g, coor);
|
||||
|
||||
bool isOdd = grid_rb->CheckerBoard(coor) == Odd;
|
||||
|
||||
if(!isOdd) continue;
|
||||
|
||||
int idx_o = (coor[Tdir] * ldim[Xdir] * ldim[Ydir] * ldim[Zdir]
|
||||
+ coor[Xdir] * ldim[Ydir] * ldim[Zdir]
|
||||
+ coor[Ydir] * ldim[Zdir]
|
||||
+ coor[Zdir])/2;
|
||||
|
||||
munge(iodata[idx_o], scalardata[idx_g]);
|
||||
});
|
||||
|
||||
grid->Barrier(); timer.Stop();
|
||||
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: munge overhead " << timer.Elapsed() << std::endl;
|
||||
|
||||
timer.Reset(); timer.Start();
|
||||
|
||||
vectorizeFromLexOrdArray(scalardata, Umu_ds);
|
||||
|
||||
grid->Barrier(); timer.Stop();
|
||||
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: vectorize overhead " << timer.Elapsed() << std::endl;
|
||||
|
||||
timer.Reset(); timer.Start();
|
||||
|
||||
undoDoubleStore(Umu, Umu_ds);
|
||||
|
||||
grid->Barrier(); timer.Stop();
|
||||
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: redistribute overhead " << timer.Elapsed() << std::endl;
|
||||
|
||||
GaugeStatistics(Umu, clone);
|
||||
|
||||
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
|
||||
|
||||
// clang-format off
|
||||
std::cout << GridLogMessage << "OpenQcd Configuration " << file
|
||||
<< " plaquette " << clone.plaquette
|
||||
<< " header " << header.plaquette
|
||||
<< " difference " << plaq_diff
|
||||
<< std::endl;
|
||||
// clang-format on
|
||||
|
||||
RealD precTol = (getPrecision<vsimd>::value == 1) ? 2e-7 : 2e-15;
|
||||
RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code
|
||||
|
||||
if(plaq_diff >= tol)
|
||||
std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl;
|
||||
assert(plaq_diff < tol);
|
||||
|
||||
std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl;
|
||||
}
|
||||
|
||||
template<class vsimd>
|
||||
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||
std::string file) {
|
||||
std::cout << GridLogError << "Writing to openQCD file format is not implemented" << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
private:
|
||||
static inline GridCartesian* createOpenQcdGrid(GridCartesian* grid) {
|
||||
// exploit GridCartesian to be able to still use IOobject
|
||||
Coordinate gdim = grid->GlobalDimensions();
|
||||
Coordinate ldim = grid->LocalDimensions();
|
||||
Coordinate pcoor = grid->ThisProcessorCoor();
|
||||
|
||||
// openqcd does rb on the z direction
|
||||
gdim[Zdir] /= 2;
|
||||
ldim[Zdir] /= 2;
|
||||
|
||||
// and has the order T X Y Z (from slowest to fastest)
|
||||
std::swap(gdim[Xdir], gdim[Zdir]);
|
||||
std::swap(ldim[Xdir], ldim[Zdir]);
|
||||
std::swap(pcoor[Xdir], pcoor[Zdir]);
|
||||
|
||||
GridCartesian* ret = SpaceTimeGrid::makeFourDimGrid(gdim, grid->_simd_layout, grid->ProcessorGrid());
|
||||
ret->_ldimensions = ldim;
|
||||
ret->_processor_coor = pcoor;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vsimd>
|
||||
static inline void undoDoubleStore(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||
Lattice<iDoubleStoredColourMatrix<vsimd>> const& Umu_ds) {
|
||||
conformable(Umu.Grid(), Umu_ds.Grid());
|
||||
Lattice<iColourMatrix<vsimd>> U(Umu.Grid());
|
||||
|
||||
// they store T+, T-, X+, X-, Y+, Y-, Z+, Z-
|
||||
for(int mu_g = 0; mu_g < Nd; ++mu_g) {
|
||||
int mu_o = (mu_g + 1) % Nd;
|
||||
U = PeekIndex<LorentzIndex>(Umu_ds, 2 * mu_o)
|
||||
+ Cshift(PeekIndex<LorentzIndex>(Umu_ds, 2 * mu_o + 1), mu_g, +1);
|
||||
PokeIndex<LorentzIndex>(Umu, U, mu_g);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
281
Grid/parallelIO/OpenQcdIOChromaReference.h
Normal file
281
Grid/parallelIO/OpenQcdIOChromaReference.h
Normal file
@ -0,0 +1,281 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/OpenQcdIOChromaReference.h
|
||||
|
||||
Copyright (C) 2015 - 2020
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <ios>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <iomanip>
|
||||
#include <mpi.h>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
#define CHECK {std::cerr << __FILE__ << " @l " << __LINE__ << ": CHECK" << grid->ThisRank() << std::endl;}
|
||||
#define CHECK_VAR(a) { std::cerr << __FILE__ << "@l" << __LINE__ << " on "<< grid->ThisRank() << ": " << __func__ << " " << #a << "=" << (a) << std::endl; }
|
||||
// #undef CHECK
|
||||
// #define CHECK
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
class ParRdr {
|
||||
private:
|
||||
bool const swap;
|
||||
|
||||
MPI_Status status;
|
||||
MPI_File fp;
|
||||
|
||||
int err;
|
||||
|
||||
MPI_Datatype oddSiteType;
|
||||
MPI_Datatype fileViewType;
|
||||
|
||||
GridBase* grid;
|
||||
|
||||
public:
|
||||
ParRdr(MPI_Comm comm, std::string const& filename, GridBase* gridPtr)
|
||||
: swap(false)
|
||||
, grid(gridPtr) {
|
||||
err = MPI_File_open(comm, const_cast<char*>(filename.c_str()), MPI_MODE_RDONLY, MPI_INFO_NULL, &fp);
|
||||
assert(err == MPI_SUCCESS);
|
||||
}
|
||||
|
||||
virtual ~ParRdr() { MPI_File_close(&fp); }
|
||||
|
||||
inline void errInfo(int const err, std::string const& func) {
|
||||
static char estring[MPI_MAX_ERROR_STRING];
|
||||
int eclass = -1, len = 0;
|
||||
MPI_Error_class(err, &eclass);
|
||||
MPI_Error_string(err, estring, &len);
|
||||
std::cerr << func << " - Error " << eclass << ": " << estring << std::endl;
|
||||
}
|
||||
|
||||
int readHeader(FieldMetaData& field) {
|
||||
assert((grid->_ndimension == Nd) && (Nd == 4));
|
||||
assert(Nc == 3);
|
||||
|
||||
OpenQcdHeader header;
|
||||
|
||||
readBlock(reinterpret_cast<char*>(&header), 0, sizeof(OpenQcdHeader), MPI_CHAR);
|
||||
|
||||
header.plaq /= 3.; // TODO change this into normalizationfactor
|
||||
|
||||
// sanity check (should trigger on endian issues) TODO remove?
|
||||
assert(0 < header.Nt && header.Nt <= 1024);
|
||||
assert(0 < header.Nx && header.Nx <= 1024);
|
||||
assert(0 < header.Ny && header.Ny <= 1024);
|
||||
assert(0 < header.Nz && header.Nz <= 1024);
|
||||
|
||||
field.dimension[0] = header.Nx;
|
||||
field.dimension[1] = header.Ny;
|
||||
field.dimension[2] = header.Nz;
|
||||
field.dimension[3] = header.Nt;
|
||||
|
||||
for(int d = 0; d < Nd; d++)
|
||||
assert(grid->FullDimensions()[d] == field.dimension[d]);
|
||||
|
||||
field.plaquette = header.plaq;
|
||||
|
||||
field.data_start = sizeof(OpenQcdHeader);
|
||||
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
void readBlock(void* const dest, uint64_t const pos, uint64_t const nbytes, MPI_Datatype const datatype) {
|
||||
err = MPI_File_read_at_all(fp, pos, dest, nbytes, datatype, &status);
|
||||
errInfo(err, "MPI_File_read_at_all");
|
||||
// CHECK_VAR(err)
|
||||
|
||||
int read = -1;
|
||||
MPI_Get_count(&status, datatype, &read);
|
||||
// CHECK_VAR(read)
|
||||
assert(nbytes == (uint64_t)read);
|
||||
assert(err == MPI_SUCCESS);
|
||||
}
|
||||
|
||||
void createTypes() {
|
||||
constexpr int elem_size = Nd * 2 * 2 * Nc * Nc * sizeof(double); // 2_complex 2_fwdbwd
|
||||
|
||||
err = MPI_Type_contiguous(elem_size, MPI_BYTE, &oddSiteType); assert(err == MPI_SUCCESS);
|
||||
err = MPI_Type_commit(&oddSiteType); assert(err == MPI_SUCCESS);
|
||||
|
||||
Coordinate const L = grid->GlobalDimensions();
|
||||
Coordinate const l = grid->LocalDimensions();
|
||||
Coordinate const i = grid->ThisProcessorCoor();
|
||||
|
||||
Coordinate sizes({L[2] / 2, L[1], L[0], L[3]});
|
||||
Coordinate subsizes({l[2] / 2, l[1], l[0], l[3]});
|
||||
Coordinate starts({i[2] * l[2] / 2, i[1] * l[1], i[0] * l[0], i[3] * l[3]});
|
||||
|
||||
err = MPI_Type_create_subarray(grid->_ndimension, &sizes[0], &subsizes[0], &starts[0], MPI_ORDER_FORTRAN, oddSiteType, &fileViewType); assert(err == MPI_SUCCESS);
|
||||
err = MPI_Type_commit(&fileViewType); assert(err == MPI_SUCCESS);
|
||||
}
|
||||
|
||||
void freeTypes() {
|
||||
err = MPI_Type_free(&fileViewType); assert(err == MPI_SUCCESS);
|
||||
err = MPI_Type_free(&oddSiteType); assert(err == MPI_SUCCESS);
|
||||
}
|
||||
|
||||
bool readGauge(std::vector<ColourMatrixD>& domain_buff, FieldMetaData& meta) {
|
||||
auto hdr_offset = readHeader(meta);
|
||||
CHECK
|
||||
createTypes();
|
||||
err = MPI_File_set_view(fp, hdr_offset, oddSiteType, fileViewType, "native", MPI_INFO_NULL); errInfo(err, "MPI_File_set_view0"); assert(err == MPI_SUCCESS);
|
||||
CHECK
|
||||
int const domainSites = grid->lSites();
|
||||
domain_buff.resize(Nd * domainSites); // 2_fwdbwd * 4_Nd * domainSites / 2_onlyodd
|
||||
|
||||
// the actual READ
|
||||
constexpr uint64_t cm_size = 2 * Nc * Nc * sizeof(double); // 2_complex
|
||||
constexpr uint64_t os_size = Nd * 2 * cm_size; // 2_fwdbwd
|
||||
constexpr uint64_t max_elems = std::numeric_limits<int>::max(); // int adressable elems: floor is fine
|
||||
uint64_t const n_os = domainSites / 2;
|
||||
|
||||
for(uint64_t os_idx = 0; os_idx < n_os;) {
|
||||
uint64_t const read_os = os_idx + max_elems <= n_os ? max_elems : n_os - os_idx;
|
||||
uint64_t const cm = os_idx * Nd * 2;
|
||||
readBlock(&(domain_buff[cm]), os_idx, read_os, oddSiteType);
|
||||
os_idx += read_os;
|
||||
}
|
||||
|
||||
CHECK
|
||||
err = MPI_File_set_view(fp, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL);
|
||||
errInfo(err, "MPI_File_set_view1");
|
||||
assert(err == MPI_SUCCESS);
|
||||
freeTypes();
|
||||
|
||||
std::cout << GridLogMessage << "read sum: " << n_os * os_size << " bytes" << std::endl;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class OpenQcdIOChromaReference : public BinaryIO {
|
||||
public:
|
||||
template<class vsimd>
|
||||
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||
Grid::FieldMetaData& header,
|
||||
std::string file) {
|
||||
typedef Lattice<iDoubleStoredColourMatrix<vsimd>> DoubledGaugeField;
|
||||
|
||||
assert(Ns == 4 and Nd == 4 and Nc == 3);
|
||||
|
||||
auto grid = Umu.Grid();
|
||||
|
||||
typedef ColourMatrixD fobj;
|
||||
|
||||
std::vector<fobj> iodata(
|
||||
Nd * grid->lSites()); // actual size = 2*Nd*lsites but have only lsites/2 sites in file
|
||||
|
||||
{
|
||||
ParRdr rdr(MPI_COMM_WORLD, file, grid);
|
||||
rdr.readGauge(iodata, header);
|
||||
} // equivalent to using binaryio
|
||||
|
||||
std::vector<iDoubleStoredColourMatrix<typename vsimd::scalar_type>> Umu_ds_scalar(grid->lSites());
|
||||
|
||||
copyToLatticeObject(Umu_ds_scalar, iodata, grid); // equivalent to munging
|
||||
|
||||
DoubledGaugeField Umu_ds(grid);
|
||||
|
||||
vectorizeFromLexOrdArray(Umu_ds_scalar, Umu_ds);
|
||||
|
||||
redistribute(Umu, Umu_ds); // equivalent to undoDoublestore
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
GaugeStatistics(Umu, clone);
|
||||
|
||||
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
|
||||
|
||||
// clang-format off
|
||||
std::cout << GridLogMessage << "OpenQcd Configuration " << file
|
||||
<< " plaquette " << clone.plaquette
|
||||
<< " header " << header.plaquette
|
||||
<< " difference " << plaq_diff
|
||||
<< std::endl;
|
||||
// clang-format on
|
||||
|
||||
RealD precTol = (getPrecision<vsimd>::value == 1) ? 2e-7 : 2e-15;
|
||||
RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code
|
||||
|
||||
if(plaq_diff >= tol)
|
||||
std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl;
|
||||
assert(plaq_diff < tol);
|
||||
|
||||
std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
template<class vsimd>
|
||||
static inline void redistribute(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||
Lattice<iDoubleStoredColourMatrix<vsimd>> const& Umu_ds) {
|
||||
Grid::conformable(Umu.Grid(), Umu_ds.Grid());
|
||||
Lattice<iColourMatrix<vsimd>> U(Umu.Grid());
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu_ds, 2) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 3), 0, +1); PokeIndex<LorentzIndex>(Umu, U, 0);
|
||||
U = PeekIndex<LorentzIndex>(Umu_ds, 4) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 5), 1, +1); PokeIndex<LorentzIndex>(Umu, U, 1);
|
||||
U = PeekIndex<LorentzIndex>(Umu_ds, 6) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 7), 2, +1); PokeIndex<LorentzIndex>(Umu, U, 2);
|
||||
U = PeekIndex<LorentzIndex>(Umu_ds, 0) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 1), 3, +1); PokeIndex<LorentzIndex>(Umu, U, 3);
|
||||
}
|
||||
|
||||
static inline void copyToLatticeObject(std::vector<DoubleStoredColourMatrix>& u_fb,
|
||||
std::vector<ColourMatrixD> const& node_buff,
|
||||
GridBase* grid) {
|
||||
assert(node_buff.size() == Nd * grid->lSites());
|
||||
|
||||
Coordinate const& l = grid->LocalDimensions();
|
||||
|
||||
Coordinate coord(Nd);
|
||||
int& x = coord[0];
|
||||
int& y = coord[1];
|
||||
int& z = coord[2];
|
||||
int& t = coord[3];
|
||||
|
||||
int buff_idx = 0;
|
||||
for(t = 0; t < l[3]; ++t) // IMPORTANT: openQCD file ordering
|
||||
for(x = 0; x < l[0]; ++x)
|
||||
for(y = 0; y < l[1]; ++y)
|
||||
for(z = 0; z < l[2]; ++z) {
|
||||
if((t + z + y + x) % 2 == 0) continue;
|
||||
|
||||
int local_idx;
|
||||
Lexicographic::IndexFromCoor(coord, local_idx, grid->LocalDimensions());
|
||||
for(int mu = 0; mu < 2 * Nd; ++mu)
|
||||
for(int c1 = 0; c1 < Nc; ++c1) {
|
||||
for(int c2 = 0; c2 < Nc; ++c2) {
|
||||
u_fb[local_idx](mu)()(c1,c2) = node_buff[mu+buff_idx]()()(c1,c2);
|
||||
}
|
||||
}
|
||||
buff_idx += 2 * Nd;
|
||||
}
|
||||
|
||||
assert(node_buff.size() == buff_idx);
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -95,7 +95,8 @@ inline uint64_t cyclecount(void){
|
||||
}
|
||||
#elif defined __x86_64__
|
||||
inline uint64_t cyclecount(void){
|
||||
return __rdtsc();
|
||||
uint64_t ret = __rdtsc();
|
||||
return (uint64_t)ret;
|
||||
}
|
||||
#else
|
||||
|
||||
|
@ -110,15 +110,15 @@ public:
|
||||
#endif
|
||||
accumulator = std::chrono::duration_cast<GridUsecs>(start-start);
|
||||
}
|
||||
GridTime Elapsed(void) {
|
||||
GridTime Elapsed(void) const {
|
||||
assert(running == false);
|
||||
return std::chrono::duration_cast<GridTime>( accumulator );
|
||||
}
|
||||
uint64_t useconds(void){
|
||||
uint64_t useconds(void) const {
|
||||
assert(running == false);
|
||||
return (uint64_t) accumulator.count();
|
||||
}
|
||||
bool isRunning(void){
|
||||
bool isRunning(void) const {
|
||||
return running;
|
||||
}
|
||||
};
|
||||
|
@ -133,23 +133,23 @@ typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
|
||||
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
|
||||
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
|
||||
|
||||
// SpinColourSpinColour matrix
|
||||
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||
// SpinColourSpinColour matrix
|
||||
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||
|
||||
// SpinColourSpinColour matrix
|
||||
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||
// SpinColourSpinColour matrix
|
||||
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||
|
||||
// LorentzColour
|
||||
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
|
||||
@ -443,16 +443,16 @@ template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<Lorentz
|
||||
//////////////////////////////////////////////
|
||||
// Fermion <-> propagator assignements
|
||||
//////////////////////////////////////////////
|
||||
//template <class Prop, class Ferm>
|
||||
template <class Fimpl>
|
||||
void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
|
||||
//template <class Prop, class Ferm>
|
||||
template <class Fimpl>
|
||||
void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
|
||||
{
|
||||
for(int j = 0; j < Ns; ++j)
|
||||
{
|
||||
auto pjs = peekSpin(p, j, s);
|
||||
auto fj = peekSpin(f, j);
|
||||
|
||||
for(int i = 0; i < Fimpl::Dimension; ++i)
|
||||
for(int i = 0; i < Fimpl::Dimension; ++i)
|
||||
{
|
||||
pokeColour(pjs, peekColour(fj, i), i, c);
|
||||
}
|
||||
@ -460,16 +460,16 @@ template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<Lorentz
|
||||
}
|
||||
}
|
||||
|
||||
//template <class Prop, class Ferm>
|
||||
template <class Fimpl>
|
||||
void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
|
||||
//template <class Prop, class Ferm>
|
||||
template <class Fimpl>
|
||||
void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
|
||||
{
|
||||
for(int j = 0; j < Ns; ++j)
|
||||
{
|
||||
auto pjs = peekSpin(p, j, s);
|
||||
auto fj = peekSpin(f, j);
|
||||
|
||||
for(int i = 0; i < Fimpl::Dimension; ++i)
|
||||
for(int i = 0; i < Fimpl::Dimension; ++i)
|
||||
{
|
||||
pokeColour(fj, peekColour(pjs, i, c), i);
|
||||
}
|
||||
|
@ -40,8 +40,8 @@ public:
|
||||
public:
|
||||
|
||||
// override multiply
|
||||
virtual RealD M (const FermionField &in, FermionField &out);
|
||||
virtual RealD Mdag (const FermionField &in, FermionField &out);
|
||||
virtual void M (const FermionField &in, FermionField &out);
|
||||
virtual void Mdag (const FermionField &in, FermionField &out);
|
||||
|
||||
// half checkerboard operations
|
||||
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||
@ -141,7 +141,33 @@ public:
|
||||
Vector<iSinglet<Simd> > MatpInvDag;
|
||||
Vector<iSinglet<Simd> > MatmInvDag;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Conserved current utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
// Virtual can't template
|
||||
void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu);
|
||||
|
||||
void SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx);
|
||||
|
||||
void ContractJ5q(PropagatorField &q_in,ComplexField &J5q);
|
||||
void ContractJ5q(FermionField &q_in,ComplexField &J5q);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Constructors
|
||||
///////////////////////////////////////////////////////////////
|
||||
CayleyFermion5D(GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
|
@ -41,8 +41,8 @@ public:
|
||||
public:
|
||||
|
||||
// override multiply
|
||||
virtual RealD M (const FermionField &in, FermionField &out);
|
||||
virtual RealD Mdag (const FermionField &in, FermionField &out);
|
||||
virtual void M (const FermionField &in, FermionField &out);
|
||||
virtual void Mdag (const FermionField &in, FermionField &out);
|
||||
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||
|
@ -53,8 +53,8 @@ public:
|
||||
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
||||
|
||||
// override multiply
|
||||
virtual RealD M (const FermionField& in, FermionField& out);
|
||||
virtual RealD Mdag (const FermionField& in, FermionField& out);
|
||||
virtual void M (const FermionField& in, FermionField& out);
|
||||
virtual void Mdag (const FermionField& in, FermionField& out);
|
||||
|
||||
// half checkerboard operations
|
||||
virtual void Mooee (const FermionField& in, FermionField& out);
|
||||
|
@ -58,8 +58,8 @@ public:
|
||||
virtual GridBase *GaugeRedBlackGrid(void) =0;
|
||||
|
||||
// override multiply
|
||||
virtual RealD M (const FermionField &in, FermionField &out)=0;
|
||||
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
|
||||
virtual void M (const FermionField &in, FermionField &out)=0;
|
||||
virtual void Mdag (const FermionField &in, FermionField &out)=0;
|
||||
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const FermionField &in, FermionField &out)=0;
|
||||
@ -86,15 +86,14 @@ public:
|
||||
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
||||
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
||||
|
||||
|
||||
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
|
||||
|
||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
|
||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
|
||||
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist)
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist)
|
||||
{
|
||||
FFT theFFT((GridCartesian *) in.Grid());
|
||||
|
||||
@ -148,15 +147,19 @@ public:
|
||||
virtual void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu)=0;
|
||||
unsigned int mu)
|
||||
{assert(0);};
|
||||
virtual void SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx)=0;
|
||||
ComplexField &lattice_cmplx)
|
||||
{assert(0);};
|
||||
|
||||
// Only reimplemented in Wilson5D
|
||||
// Default to just a zero correlation function
|
||||
|
@ -38,6 +38,7 @@ public:
|
||||
static const bool isFundamental = Representation::isFundamental;
|
||||
static const int Nhcs = Options::Nhcs;
|
||||
static const bool LsVectorised=false;
|
||||
static const bool isGparity=true;
|
||||
|
||||
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Dimension> > Gimpl;
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
@ -46,7 +47,7 @@ public:
|
||||
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
|
||||
|
||||
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Dimension>, Ns>, Ngp>;
|
||||
template <typename vtype> using iImplPropagator = iVector<iMatrix<iMatrix<vtype, Dimension>, Ns>, Ngp>;
|
||||
template <typename vtype> using iImplPropagator = iMatrix<iMatrix<iMatrix<vtype, Dimension>, Ns>, Ngp>;
|
||||
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Dimension>, Nhs>, Ngp>;
|
||||
template <typename vtype> using iImplHalfCommSpinor = iVector<iVector<iVector<vtype, Dimension>, Nhcs>, Ngp>;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>, Ngp>;
|
||||
@ -80,6 +81,7 @@ public:
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class _Spinor>
|
||||
static accelerator_inline void multLink(_Spinor &phi,
|
||||
const SiteDoubledGaugeField &U,
|
||||
@ -191,6 +193,16 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template<class _SpinorField>
|
||||
inline void multLinkField(_SpinorField & out,
|
||||
const DoubledGaugeField &Umu,
|
||||
const _SpinorField & phi,
|
||||
int mu)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template <class ref>
|
||||
static accelerator_inline void loadLinkElement(Simd ®, ref &memory)
|
||||
{
|
||||
|
@ -71,8 +71,8 @@ public:
|
||||
// override multiply; cut number routines if pass dagger argument
|
||||
// and also make interface more uniformly consistent
|
||||
//////////////////////////////////////////////////////////////////
|
||||
RealD M(const FermionField &in, FermionField &out);
|
||||
RealD Mdag(const FermionField &in, FermionField &out);
|
||||
void M(const FermionField &in, FermionField &out);
|
||||
void Mdag(const FermionField &in, FermionField &out);
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
// half checkerboard operations
|
||||
@ -185,10 +185,12 @@ public:
|
||||
void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu);
|
||||
void SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &srct,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
@ -74,8 +73,8 @@ public:
|
||||
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
|
||||
|
||||
// full checkerboard operations; leave unimplemented as abstract for now
|
||||
RealD M (const FermionField &in, FermionField &out);
|
||||
RealD Mdag (const FermionField &in, FermionField &out);
|
||||
void M (const FermionField &in, FermionField &out);
|
||||
void Mdag (const FermionField &in, FermionField &out);
|
||||
|
||||
// half checkerboard operations
|
||||
void Meooe (const FermionField &in, FermionField &out);
|
||||
@ -217,15 +216,17 @@ public:
|
||||
void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu);
|
||||
void SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx);
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx);
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -40,6 +40,11 @@ inline void convert(const Fieldi &from,Fieldo &to)
|
||||
to=from;
|
||||
}
|
||||
|
||||
struct MADWFinnerIterCallbackBase{
|
||||
virtual void operator()(const RealD current_resid){}
|
||||
virtual ~MADWFinnerIterCallbackBase(){}
|
||||
};
|
||||
|
||||
template<class Matrixo,class Matrixi,class PVinverter,class SchurSolver, class Guesser>
|
||||
class MADWF
|
||||
{
|
||||
@ -56,24 +61,30 @@ class MADWF
|
||||
|
||||
RealD target_resid;
|
||||
int maxiter;
|
||||
public:
|
||||
|
||||
//operator() is called on "callback" at the end of every inner iteration. This allows for example the adjustment of the inner
|
||||
//tolerance to speed up subsequent iteration
|
||||
MADWFinnerIterCallbackBase* callback;
|
||||
|
||||
public:
|
||||
MADWF(Matrixo &_Mato,
|
||||
Matrixi &_Mati,
|
||||
PVinverter &_PauliVillarsSolvero,
|
||||
Matrixi &_Mati,
|
||||
PVinverter &_PauliVillarsSolvero,
|
||||
SchurSolver &_SchurSolveri,
|
||||
Guesser & _Guesseri,
|
||||
RealD resid,
|
||||
int _maxiter) :
|
||||
int _maxiter,
|
||||
MADWFinnerIterCallbackBase* _callback = NULL) :
|
||||
|
||||
Mato(_Mato),Mati(_Mati),
|
||||
SchurSolveri(_SchurSolveri),
|
||||
PauliVillarsSolvero(_PauliVillarsSolvero),Guesseri(_Guesseri)
|
||||
{
|
||||
target_resid=resid;
|
||||
maxiter =_maxiter;
|
||||
};
|
||||
|
||||
PauliVillarsSolvero(_PauliVillarsSolvero),Guesseri(_Guesseri),
|
||||
callback(_callback)
|
||||
{
|
||||
target_resid=resid;
|
||||
maxiter =_maxiter;
|
||||
};
|
||||
|
||||
void operator() (const FermionFieldo &src4,FermionFieldo &sol5)
|
||||
{
|
||||
std::cout << GridLogMessage<< " ************************************************" << std::endl;
|
||||
@ -177,6 +188,8 @@ class MADWF
|
||||
std::cout << GridLogMessage << "Residual " << i << ": " << resid << std::endl;
|
||||
std::cout << GridLogMessage << "***************************************" <<std::endl;
|
||||
|
||||
if(callback != NULL) (*callback)(resid);
|
||||
|
||||
if (resid < target_resid) {
|
||||
return;
|
||||
}
|
||||
|
@ -56,8 +56,8 @@ public:
|
||||
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
||||
|
||||
// override multiply
|
||||
virtual RealD M (const FermionField& in, FermionField& out);
|
||||
virtual RealD Mdag (const FermionField& in, FermionField& out);
|
||||
virtual void M (const FermionField& in, FermionField& out);
|
||||
virtual void Mdag (const FermionField& in, FermionField& out);
|
||||
|
||||
// half checkerboard operations
|
||||
virtual void Mooee (const FermionField& in, FermionField& out);
|
||||
|
@ -59,7 +59,7 @@ public:
|
||||
{
|
||||
RealD eps = 1.0;
|
||||
|
||||
std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
|
||||
// std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
|
||||
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
|
||||
assert(zdata->n==this->Ls);
|
||||
|
||||
|
@ -47,8 +47,8 @@ public:
|
||||
void M_internal(const FermionField &in, FermionField &out,int dag);
|
||||
|
||||
// override multiply
|
||||
virtual RealD M (const FermionField &in, FermionField &out);
|
||||
virtual RealD Mdag (const FermionField &in, FermionField &out);
|
||||
virtual void M (const FermionField &in, FermionField &out);
|
||||
virtual void Mdag (const FermionField &in, FermionField &out);
|
||||
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||
|
@ -109,9 +109,8 @@ public:
|
||||
ImportGauge(_Umu);
|
||||
}
|
||||
|
||||
virtual RealD M(const FermionField &in, FermionField &out);
|
||||
virtual RealD Mdag(const FermionField &in, FermionField &out);
|
||||
|
||||
virtual void M(const FermionField &in, FermionField &out);
|
||||
virtual void Mdag(const FermionField &in, FermionField &out);
|
||||
virtual void Mooee(const FermionField &in, FermionField &out);
|
||||
virtual void MooeeDag(const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInv(const FermionField &in, FermionField &out);
|
||||
|
@ -78,8 +78,8 @@ public:
|
||||
// override multiply; cut number routines if pass dagger argument
|
||||
// and also make interface more uniformly consistent
|
||||
//////////////////////////////////////////////////////////////////
|
||||
virtual RealD M(const FermionField &in, FermionField &out);
|
||||
virtual RealD Mdag(const FermionField &in, FermionField &out);
|
||||
virtual void M(const FermionField &in, FermionField &out);
|
||||
virtual void Mdag(const FermionField &in, FermionField &out);
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
// half checkerboard operations
|
||||
@ -179,15 +179,17 @@ public:
|
||||
void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu);
|
||||
void SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx);
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx);
|
||||
};
|
||||
|
||||
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
@ -99,8 +98,8 @@ public:
|
||||
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
|
||||
|
||||
// full checkerboard operations; leave unimplemented as abstract for now
|
||||
virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;};
|
||||
virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
|
||||
virtual void M (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void Mdag (const FermionField &in, FermionField &out){assert(0);};
|
||||
|
||||
// half checkerboard operations; leave unimplemented as abstract for now
|
||||
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
|
||||
@ -217,25 +216,7 @@ public:
|
||||
|
||||
// Comms buffer
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Conserved current utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
Current curr_type,
|
||||
unsigned int mu);
|
||||
void SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx);
|
||||
|
||||
void ContractJ5q(PropagatorField &q_in,ComplexField &J5q);
|
||||
void ContractJ5q(FermionField &q_in,ComplexField &J5q);
|
||||
|
||||
};
|
||||
|
||||
|
@ -41,6 +41,7 @@ public:
|
||||
static const int Dimension = Representation::Dimension;
|
||||
static const bool isFundamental = Representation::isFundamental;
|
||||
static const bool LsVectorised=false;
|
||||
static const bool isGparity=false;
|
||||
static const int Nhcs = Options::Nhcs;
|
||||
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||
@ -98,8 +99,21 @@ public:
|
||||
{
|
||||
multLink(phi,U,chi,mu);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class _SpinorField>
|
||||
inline void multLinkField(_SpinorField & out,
|
||||
const DoubledGaugeField &Umu,
|
||||
const _SpinorField & phi,
|
||||
int mu)
|
||||
{
|
||||
auto out_v= out.View();
|
||||
auto phi_v= phi.View();
|
||||
auto Umu_v= Umu.View();
|
||||
thread_for(sss,out.Grid()->oSites(),{
|
||||
multLink(out_v[sss],Umu_v[sss],phi_v[sss],mu);
|
||||
});
|
||||
}
|
||||
|
||||
template <class ref>
|
||||
static accelerator_inline void loadLinkElement(Simd ®, ref &memory)
|
||||
{
|
||||
|
@ -66,41 +66,6 @@ public:
|
||||
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Utilities for inserting Wilson conserved current.
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
static void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign = false);
|
||||
|
||||
static void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign = false);
|
||||
|
||||
static void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vPredicate t_mask,
|
||||
bool switch_sign = false);
|
||||
|
||||
static void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vPredicate t_mask,
|
||||
bool switch_sign = false);
|
||||
|
||||
private:
|
||||
|
||||
static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
||||
|
@ -120,7 +120,8 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
}
|
||||
}
|
||||
|
||||
virtual RealD M(const FermionField &in, FermionField &out) {
|
||||
virtual void M(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
this->Dhop(in, out, DaggerNo);
|
||||
FermionField tmp(out.Grid());
|
||||
@ -129,11 +130,12 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
ComplexD b(0.0,this->mu[s]);
|
||||
axpbg5y_ssp(tmp,a,in,b,in,s,s);
|
||||
}
|
||||
return axpy_norm(out, 1.0, tmp, out);
|
||||
axpy(out, 1.0, tmp, out);
|
||||
}
|
||||
|
||||
// needed for fast PV
|
||||
void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu) {
|
||||
void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu)
|
||||
{
|
||||
assert(_mass.size() == _mu.size());
|
||||
assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
|
||||
this->mass = _mass;
|
||||
|
@ -323,7 +323,7 @@ void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||
void CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
FermionField Din(psi.Grid());
|
||||
|
||||
@ -335,11 +335,10 @@ RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||
axpby(chi,1.0,1.0,chi,psi);
|
||||
|
||||
M5D(psi,chi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||
void CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
// Under adjoint
|
||||
//D1+ D1- P- -> D1+^dag P+ D2-^dag
|
||||
@ -354,7 +353,6 @@ RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||
M5Ddag(psi,chi);
|
||||
// ((b D_W + D_w hop terms +1) on s-diag
|
||||
axpby (chi,1.0,1.0,chi,psi);
|
||||
return norm2(chi);
|
||||
}
|
||||
|
||||
// half checkerboard operations
|
||||
@ -588,6 +586,356 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
|
||||
// this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void CayleyFermion5D<Impl>::ContractJ5q(FermionField &q_in,ComplexField &J5q)
|
||||
{
|
||||
conformable(this->GaugeGrid(), J5q.Grid());
|
||||
conformable(q_in.Grid(), this->FermionGrid());
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
// 4d field
|
||||
int Ls = this->Ls;
|
||||
FermionField psi(this->GaugeGrid());
|
||||
FermionField p_plus (this->GaugeGrid());
|
||||
FermionField p_minus(this->GaugeGrid());
|
||||
FermionField p(this->GaugeGrid());
|
||||
|
||||
ExtractSlice(p_plus , q_in, Ls/2-1 , 0);
|
||||
ExtractSlice(p_minus, q_in, Ls/2 , 0);
|
||||
p_plus = p_plus + G5*p_plus;
|
||||
p_minus= p_minus - G5*p_minus;
|
||||
p=0.5*(p_plus+p_minus);
|
||||
J5q = localInnerProduct(p,p);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void CayleyFermion5D<Impl>::ContractJ5q(PropagatorField &q_in,ComplexField &J5q)
|
||||
{
|
||||
conformable(this->GaugeGrid(), J5q.Grid());
|
||||
conformable(q_in.Grid(), this->FermionGrid());
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
// 4d field
|
||||
int Ls = this->Ls;
|
||||
PropagatorField psi(this->GaugeGrid());
|
||||
PropagatorField p_plus (this->GaugeGrid());
|
||||
PropagatorField p_minus(this->GaugeGrid());
|
||||
PropagatorField p(this->GaugeGrid());
|
||||
|
||||
ExtractSlice(p_plus , q_in, Ls/2-1 , 0);
|
||||
ExtractSlice(p_minus, q_in, Ls/2 , 0);
|
||||
p_plus = p_plus + G5*p_plus;
|
||||
p_minus= p_minus - G5*p_minus;
|
||||
p=0.5*(p_plus+p_minus);
|
||||
J5q = localInnerProduct(p,p);
|
||||
}
|
||||
|
||||
#define Pp(Q) (0.5*(Q+g5*Q))
|
||||
#define Pm(Q) (0.5*(Q-g5*Q))
|
||||
#define Q_4d(Q) (Pm((Q)[0]) + Pp((Q)[Ls-1]))
|
||||
#define TopRowWithSource(Q) (phys_src + (1.0-mass)*Q_4d(Q))
|
||||
|
||||
template <class Impl>
|
||||
void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu)
|
||||
{
|
||||
#ifndef GRID_NVCC
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT,
|
||||
Gamma::Algebra::Gamma5
|
||||
};
|
||||
|
||||
auto UGrid= this->GaugeGrid();
|
||||
auto FGrid= this->FermionGrid();
|
||||
RealD sgn=1.0;
|
||||
if ( curr_type == Current::Axial ) sgn = -1.0;
|
||||
|
||||
int Ls = this->Ls;
|
||||
|
||||
std::vector<PropagatorField> L_Q(Ls,UGrid);
|
||||
std::vector<PropagatorField> R_Q(Ls,UGrid);
|
||||
for(int s=0;s<Ls;s++){
|
||||
ExtractSlice(L_Q[s], q_in_1, s , 0);
|
||||
ExtractSlice(R_Q[s], q_in_2, s , 0);
|
||||
}
|
||||
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
PropagatorField C(UGrid);
|
||||
PropagatorField p5d(UGrid);
|
||||
PropagatorField us_p5d(UGrid);
|
||||
PropagatorField gp5d(UGrid);
|
||||
PropagatorField gus_p5d(UGrid);
|
||||
|
||||
PropagatorField L_TmLsGq0(UGrid);
|
||||
PropagatorField L_TmLsTmp(UGrid);
|
||||
PropagatorField R_TmLsGq0(UGrid);
|
||||
PropagatorField R_TmLsTmp(UGrid);
|
||||
{
|
||||
PropagatorField TermA(UGrid);
|
||||
PropagatorField TermB(UGrid);
|
||||
PropagatorField TermC(UGrid);
|
||||
PropagatorField TermD(UGrid);
|
||||
TermA = (Pp(Q_4d(L_Q)));
|
||||
TermB = (Pm(Q_4d(L_Q)));
|
||||
TermC = (Pm(TopRowWithSource(L_Q)));
|
||||
TermD = (Pp(TopRowWithSource(L_Q)));
|
||||
|
||||
L_TmLsGq0 = (TermD - TermA + TermB);
|
||||
L_TmLsTmp = (TermC - TermB + TermA);
|
||||
|
||||
TermA = (Pp(Q_4d(R_Q)));
|
||||
TermB = (Pm(Q_4d(R_Q)));
|
||||
TermC = (Pm(TopRowWithSource(R_Q)));
|
||||
TermD = (Pp(TopRowWithSource(R_Q)));
|
||||
|
||||
R_TmLsGq0 = (TermD - TermA + TermB);
|
||||
R_TmLsTmp = (TermC - TermB + TermA);
|
||||
}
|
||||
|
||||
std::vector<PropagatorField> R_TmLsGq(Ls,UGrid);
|
||||
std::vector<PropagatorField> L_TmLsGq(Ls,UGrid);
|
||||
for(int s=0;s<Ls;s++){
|
||||
R_TmLsGq[s] = (Pm((R_Q)[(s)]) + Pp((R_Q)[((s)-1+Ls)%Ls]));
|
||||
L_TmLsGq[s] = (Pm((L_Q)[(s)]) + Pp((L_Q)[((s)-1+Ls)%Ls]));
|
||||
}
|
||||
|
||||
Gamma gmu=Gamma(Gmu[mu]);
|
||||
|
||||
q_out = Zero();
|
||||
PropagatorField tmp(UGrid);
|
||||
for(int s=0;s<Ls;s++){
|
||||
|
||||
int sp = (s+1)%Ls;
|
||||
int sr = Ls-1-s;
|
||||
int srp= (sr+1)%Ls;
|
||||
|
||||
// Mobius parameters
|
||||
auto b=this->bs[s];
|
||||
auto c=this->cs[s];
|
||||
auto bpc = 1.0/(b+c); // -0.5 factor in gauge links
|
||||
if (s == 0) {
|
||||
p5d =(b*Pm(L_TmLsGq[Ls-1])+ c*Pp(L_TmLsGq[Ls-1]) + b*Pp(L_TmLsTmp) + c*Pm(L_TmLsTmp ));
|
||||
tmp =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1]));
|
||||
} else if (s == Ls-1) {
|
||||
p5d =(b*Pm(L_TmLsGq0) + c*Pp(L_TmLsGq0 ) + b*Pp(L_TmLsGq[1]) + c*Pm(L_TmLsGq[1]));
|
||||
tmp =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp ));
|
||||
} else {
|
||||
p5d =(b*Pm(L_TmLsGq[sr]) + c*Pp(L_TmLsGq[sr])+ b*Pp(L_TmLsGq[srp])+ c*Pm(L_TmLsGq[srp]));
|
||||
tmp =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp ])+ c*Pm(R_TmLsGq[sp]));
|
||||
}
|
||||
tmp = Cshift(tmp,mu,1);
|
||||
Impl::multLinkField(us_p5d,this->Umu,tmp,mu);
|
||||
|
||||
gp5d=g5*p5d*g5;
|
||||
gus_p5d=gmu*us_p5d;
|
||||
|
||||
C = bpc*(adj(gp5d)*us_p5d);
|
||||
C-= bpc*(adj(gp5d)*gus_p5d);
|
||||
|
||||
if (s == 0) {
|
||||
p5d =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1]));
|
||||
tmp =(b*Pm(L_TmLsGq[Ls-1])+ c*Pp(L_TmLsGq[Ls-1]) + b*Pp(L_TmLsTmp) + c*Pm(L_TmLsTmp ));
|
||||
} else if (s == Ls-1) {
|
||||
p5d =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp ));
|
||||
tmp =(b*Pm(L_TmLsGq0) + c*Pp(L_TmLsGq0 ) + b*Pp(L_TmLsGq[1]) + c*Pm(L_TmLsGq[1]));
|
||||
} else {
|
||||
p5d =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp ])+ c*Pm(R_TmLsGq[sp]));
|
||||
tmp =(b*Pm(L_TmLsGq[sr]) + c*Pp(L_TmLsGq[sr]) + b*Pp(L_TmLsGq[srp])+ c*Pm(L_TmLsGq[srp]));
|
||||
}
|
||||
tmp = Cshift(tmp,mu,1);
|
||||
Impl::multLinkField(us_p5d,this->Umu,tmp,mu);
|
||||
|
||||
gp5d=gmu*p5d;
|
||||
gus_p5d=g5*us_p5d*g5;
|
||||
|
||||
C-= bpc*(adj(gus_p5d)*gp5d);
|
||||
C-= bpc*(adj(gus_p5d)*p5d);
|
||||
|
||||
if (s < Ls/2) q_out += sgn*C;
|
||||
else q_out += C;
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &ph)// Complex phase factor
|
||||
{
|
||||
assert(mu>=0);
|
||||
assert(mu<Nd);
|
||||
|
||||
|
||||
#if 0
|
||||
int tshift = (mu == Nd-1) ? 1 : 0;
|
||||
////////////////////////////////////////////////
|
||||
// SHAMIR CASE
|
||||
////////////////////////////////////////////////
|
||||
int Ls = this->Ls;
|
||||
auto UGrid= this->GaugeGrid();
|
||||
auto FGrid= this->FermionGrid();
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT
|
||||
};
|
||||
Gamma gmu=Gamma(Gmu[mu]);
|
||||
|
||||
PropagatorField L_Q(UGrid);
|
||||
PropagatorField R_Q(UGrid);
|
||||
|
||||
PropagatorField tmp(UGrid);
|
||||
PropagatorField Utmp(UGrid);
|
||||
LatticeInteger zz (UGrid); zz=0.0;
|
||||
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
|
||||
for (int s=0;s<Ls;s++) {
|
||||
|
||||
RealD G_s = (curr_type == Current::Axial ) ? ((s < Ls/2) ? -1 : 1) : 1;
|
||||
|
||||
ExtractSlice(R_Q, q_in, s , 0);
|
||||
|
||||
tmp = Cshift(R_Q,mu,1);
|
||||
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
|
||||
tmp = G_s*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
|
||||
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
|
||||
tmp = where((lcoor<=tmax),tmp,zz);
|
||||
L_Q = tmp;
|
||||
|
||||
tmp = R_Q*ph;
|
||||
tmp = Cshift(tmp,mu,-1);
|
||||
Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd);// Adjoint link
|
||||
tmp = -G_s*( Utmp + gmu*Utmp );
|
||||
tmp = where((lcoor>=tmin+tshift),tmp,zz); // Mask the time
|
||||
tmp = where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
|
||||
L_Q= L_Q+tmp;
|
||||
|
||||
InsertSlice(L_Q, q_out, s , 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef GRID_NVCC
|
||||
int tshift = (mu == Nd-1) ? 1 : 0;
|
||||
////////////////////////////////////////////////
|
||||
// GENERAL CAYLEY CASE
|
||||
////////////////////////////////////////////////
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT,
|
||||
Gamma::Algebra::Gamma5
|
||||
};
|
||||
Gamma gmu=Gamma(Gmu[mu]);
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
|
||||
int Ls = this->Ls;
|
||||
auto UGrid= this->GaugeGrid();
|
||||
auto FGrid= this->FermionGrid();
|
||||
|
||||
std::vector<PropagatorField> R_Q(Ls,UGrid);
|
||||
PropagatorField L_Q(UGrid);
|
||||
PropagatorField tmp(UGrid);
|
||||
PropagatorField Utmp(UGrid);
|
||||
|
||||
LatticeInteger zz (UGrid); zz=0.0;
|
||||
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
|
||||
|
||||
for(int s=0;s<Ls;s++){
|
||||
ExtractSlice(R_Q[s], q_in, s , 0);
|
||||
}
|
||||
|
||||
PropagatorField R_TmLsGq0(UGrid);
|
||||
PropagatorField R_TmLsTmp(UGrid);
|
||||
{
|
||||
PropagatorField TermA(UGrid);
|
||||
PropagatorField TermB(UGrid);
|
||||
PropagatorField TermC(UGrid);
|
||||
PropagatorField TermD(UGrid);
|
||||
|
||||
TermA = (Pp(Q_4d(R_Q)));
|
||||
TermB = (Pm(Q_4d(R_Q)));
|
||||
TermC = (Pm(TopRowWithSource(R_Q)));
|
||||
TermD = (Pp(TopRowWithSource(R_Q)));
|
||||
|
||||
R_TmLsGq0 = (TermD - TermA + TermB);
|
||||
R_TmLsTmp = (TermC - TermB + TermA);
|
||||
}
|
||||
|
||||
std::vector<PropagatorField> R_TmLsGq(Ls,UGrid);
|
||||
for(int s=0;s<Ls;s++){
|
||||
R_TmLsGq[s] = (Pm((R_Q)[(s)]) + Pp((R_Q)[((s)-1+Ls)%Ls]));
|
||||
}
|
||||
|
||||
std::vector<RealD> G_s(Ls,1.0);
|
||||
if ( curr_type == Current::Axial ) {
|
||||
for(int s=0;s<Ls/2;s++){
|
||||
G_s[s] = -1.0;
|
||||
}
|
||||
}
|
||||
|
||||
for(int s=0;s<Ls;s++){
|
||||
|
||||
int sp = (s+1)%Ls;
|
||||
int sr = Ls-1-s;
|
||||
int srp= (sr+1)%Ls;
|
||||
|
||||
// Mobius parameters
|
||||
auto b=this->bs[s];
|
||||
auto c=this->cs[s];
|
||||
// auto bpc = G_s[s]*1.0/(b+c); // -0.5 factor in gauge links
|
||||
|
||||
if (s == 0) {
|
||||
tmp =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1]));
|
||||
} else if (s == Ls-1) {
|
||||
tmp =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp ));
|
||||
} else {
|
||||
tmp =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp ])+ c*Pm(R_TmLsGq[sp]));
|
||||
}
|
||||
|
||||
tmp = Cshift(tmp,mu,1);
|
||||
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
|
||||
tmp = G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
|
||||
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
|
||||
L_Q = where((lcoor<=tmax),tmp,zz); // Position of current complicated
|
||||
|
||||
if (s == 0) {
|
||||
tmp =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1]));
|
||||
} else if (s == Ls-1) {
|
||||
tmp =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp ));
|
||||
} else {
|
||||
tmp =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp])+ c*Pm(R_TmLsGq[sp]));
|
||||
}
|
||||
tmp = tmp *ph;
|
||||
tmp = Cshift(tmp,mu,-1);
|
||||
Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
|
||||
tmp = -G_s[s]*( Utmp + gmu*Utmp );
|
||||
tmp = where((lcoor>=tmin+tshift),tmp,zz); // Mask the time
|
||||
L_Q += where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
|
||||
|
||||
InsertSlice(L_Q, q_out, s , 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#undef Pp
|
||||
#undef Pm
|
||||
#undef Q_4d
|
||||
#undef TopRowWithSource
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv,
|
||||
|
@ -94,7 +94,7 @@ void ContinuedFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Ap
|
||||
|
||||
|
||||
template<class Impl>
|
||||
RealD ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||
void ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
@ -116,15 +116,14 @@ RealD ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, F
|
||||
}
|
||||
sign=-sign;
|
||||
}
|
||||
return norm2(chi);
|
||||
}
|
||||
template<class Impl>
|
||||
RealD ContinuedFractionFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||
void ContinuedFractionFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
// This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
|
||||
// The rest of matrix is symmetric.
|
||||
// Can ignore "dag"
|
||||
return M(psi,chi);
|
||||
M(psi,chi);
|
||||
}
|
||||
template<class Impl>
|
||||
void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
||||
|
@ -89,7 +89,7 @@ void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionFiel
|
||||
/*****************************************************************************************************/
|
||||
|
||||
template<class Impl>
|
||||
RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||
void DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
FermionField Din(psi.Grid());
|
||||
|
||||
@ -97,11 +97,10 @@ RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||
this->DW(Din, chi, DaggerNo);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
this->M5D(psi, chi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||
void DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
FermionField Din(psi.Grid());
|
||||
|
||||
@ -109,7 +108,6 @@ RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& c
|
||||
this->MeooeDag5D(Din, chi);
|
||||
this->M5Ddag(psi, chi);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
|
@ -548,21 +548,24 @@ void ImprovedStaggeredFermion5D<Impl>::MdirAll(const FermionField &in, std::vect
|
||||
assert(0);
|
||||
}
|
||||
template <class Impl>
|
||||
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Dhop(in, out, DaggerNo);
|
||||
return axpy_norm(out, mass, in, out);
|
||||
axpy(out, mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
RealD ImprovedStaggeredFermion5D<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion5D<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Dhop(in, out, DaggerYes);
|
||||
return axpy_norm(out, mass, in, out);
|
||||
axpy(out, mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
@ -570,7 +573,8 @@ void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionFiel
|
||||
}
|
||||
}
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
@ -579,27 +583,30 @@ void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionF
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
typename FermionField::scalar_type scal(mass);
|
||||
out = scal * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Mooee(in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
out = (1.0 / (mass)) * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,
|
||||
FermionField &out) {
|
||||
void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MooeeInv(in, out);
|
||||
}
|
||||
@ -611,6 +618,7 @@ template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu)
|
||||
{
|
||||
@ -620,11 +628,12 @@ void ImprovedStaggeredFermion5D<Impl>::ContractConservedCurrent(PropagatorField
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx)
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx)
|
||||
{
|
||||
assert(0);
|
||||
|
||||
|
@ -171,21 +171,24 @@ void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const
|
||||
/////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
RealD ImprovedStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Dhop(in, out, DaggerNo);
|
||||
return axpy_norm(out, mass, in, out);
|
||||
axpy(out, mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
RealD ImprovedStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Dhop(in, out, DaggerYes);
|
||||
return axpy_norm(out, mass, in, out);
|
||||
axpy(out, mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
@ -193,7 +196,8 @@ void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField
|
||||
}
|
||||
}
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
@ -202,27 +206,30 @@ void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionFie
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
typename FermionField::scalar_type scal(mass);
|
||||
out = scal * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Mooee(in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||
void ImprovedStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
out = (1.0 / (mass)) * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,
|
||||
FermionField &out) {
|
||||
void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MooeeInv(in, out);
|
||||
}
|
||||
@ -234,7 +241,8 @@ void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
GaugeField & mat,
|
||||
const FermionField &A, const FermionField &B, int dag) {
|
||||
const FermionField &A, const FermionField &B, int dag)
|
||||
{
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
Compressor compressor;
|
||||
@ -284,8 +292,8 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
|
||||
void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||
{
|
||||
conformable(U.Grid(), _grid);
|
||||
conformable(U.Grid(), V.Grid());
|
||||
conformable(U.Grid(), mat.Grid());
|
||||
@ -296,8 +304,8 @@ void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionFie
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
|
||||
void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||
{
|
||||
conformable(U.Grid(), _cbgrid);
|
||||
conformable(U.Grid(), V.Grid());
|
||||
conformable(U.Grid(), mat.Grid());
|
||||
@ -310,8 +318,8 @@ void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionF
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
|
||||
void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||
{
|
||||
conformable(U.Grid(), _cbgrid);
|
||||
conformable(U.Grid(), V.Grid());
|
||||
conformable(U.Grid(), mat.Grid());
|
||||
@ -600,6 +608,7 @@ template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu)
|
||||
{
|
||||
@ -609,6 +618,7 @@ void ImprovedStaggeredFermion<Impl>::ContractConservedCurrent(PropagatorField &q
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
|
@ -166,7 +166,7 @@ void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& c
|
||||
/*****************************************************************************************************/
|
||||
|
||||
template<class Impl>
|
||||
RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||
void MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
FermionField Din(psi.Grid());
|
||||
|
||||
@ -174,11 +174,10 @@ RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||
this->DW(Din, chi, DaggerNo);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
this->M5D(psi, chi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||
void MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
FermionField Din(psi.Grid());
|
||||
|
||||
@ -186,7 +185,6 @@ RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||
this->MeooeDag5D(Din, chi);
|
||||
this->M5Ddag(psi, chi);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
|
@ -269,16 +269,14 @@ void PartialFractionFermion5D<Impl>::M_internal(const FermionField &psi, Fermi
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
RealD PartialFractionFermion5D<Impl>::M (const FermionField &in, FermionField &out)
|
||||
void PartialFractionFermion5D<Impl>::M (const FermionField &in, FermionField &out)
|
||||
{
|
||||
M_internal(in,out,DaggerNo);
|
||||
return norm2(out);
|
||||
}
|
||||
template<class Impl>
|
||||
RealD PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
|
||||
void PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
|
||||
{
|
||||
M_internal(in,out,DaggerYes);
|
||||
return norm2(out);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
|
@ -35,7 +35,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// *NOT* EO
|
||||
template <class Impl>
|
||||
RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
|
||||
void WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
|
||||
{
|
||||
FermionField temp(out.Grid());
|
||||
|
||||
@ -47,11 +47,10 @@ RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
|
||||
Mooee(in, temp);
|
||||
|
||||
out += temp;
|
||||
return norm2(out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
RealD WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||
void WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
FermionField temp(out.Grid());
|
||||
|
||||
@ -63,7 +62,6 @@ RealD WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||
MooeeDag(in, temp);
|
||||
|
||||
out += temp;
|
||||
return norm2(out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@ -132,14 +130,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
pickCheckerboard(Even, CloverTermEven, CloverTerm);
|
||||
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
|
||||
|
||||
pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
|
||||
pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
|
||||
pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm)));
|
||||
pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm)));
|
||||
|
||||
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
|
||||
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
|
||||
|
||||
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
|
||||
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
|
||||
pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv)));
|
||||
pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv)));
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
@ -861,7 +861,6 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
|
||||
* Conserved current utilities for Wilson fermions, for contracting propagators
|
||||
* to make a conserved current sink or inserting the conserved current
|
||||
* sequentially.
|
||||
******************************************************************************/
|
||||
|
||||
// Helper macro to reverse Simd vector. Fixme: slow, generic implementation.
|
||||
#define REVERSE_LS(qSite, qSiteRev, Nsimd) \
|
||||
@ -877,220 +876,10 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
|
||||
merge(qSiteRev, qSiteVec); \
|
||||
}
|
||||
|
||||
// psi = chiralProjectPlus(Result_s[Ls/2-1]);
|
||||
// psi+= chiralProjectMinus(Result_s[Ls/2]);
|
||||
// PJ5q+=localInnerProduct(psi,psi);
|
||||
|
||||
template<class vobj>
|
||||
Lattice<vobj> spProj5p(const Lattice<vobj> & in)
|
||||
{
|
||||
GridBase *grid=in.Grid();
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
Lattice<vobj> ret(grid);
|
||||
auto ret_v = ret.View();
|
||||
auto in_v = in.View();
|
||||
thread_for(ss,grid->oSites(),{
|
||||
ret_v[ss] = in_v[ss] + G5*in_v[ss];
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
template<class vobj>
|
||||
Lattice<vobj> spProj5m(const Lattice<vobj> & in)
|
||||
{
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
GridBase *grid=in.Grid();
|
||||
Lattice<vobj> ret(grid);
|
||||
auto ret_v = ret.View();
|
||||
auto in_v = in.View();
|
||||
thread_for(ss,grid->oSites(),{
|
||||
ret_v[ss] = in_v[ss] - G5*in_v[ss];
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion5D<Impl>::ContractJ5q(FermionField &q_in,ComplexField &J5q)
|
||||
{
|
||||
conformable(GaugeGrid(), J5q.Grid());
|
||||
conformable(q_in.Grid(), FermionGrid());
|
||||
|
||||
// 4d field
|
||||
int Ls = this->Ls;
|
||||
FermionField psi(GaugeGrid());
|
||||
FermionField p_plus (GaugeGrid());
|
||||
FermionField p_minus(GaugeGrid());
|
||||
FermionField p(GaugeGrid());
|
||||
|
||||
ExtractSlice(p_plus , q_in, Ls/2 , 0);
|
||||
ExtractSlice(p_minus, q_in, Ls/2-1 , 0);
|
||||
p_plus = spProj5p(p_plus );
|
||||
p_minus= spProj5m(p_minus);
|
||||
p=p_plus+p_minus;
|
||||
J5q = localInnerProduct(p,p);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion5D<Impl>::ContractJ5q(PropagatorField &q_in,ComplexField &J5q)
|
||||
{
|
||||
conformable(GaugeGrid(), J5q.Grid());
|
||||
conformable(q_in.Grid(), FermionGrid());
|
||||
|
||||
// 4d field
|
||||
int Ls = this->Ls;
|
||||
PropagatorField psi(GaugeGrid());
|
||||
PropagatorField p_plus (GaugeGrid());
|
||||
PropagatorField p_minus(GaugeGrid());
|
||||
PropagatorField p(GaugeGrid());
|
||||
|
||||
ExtractSlice(p_plus , q_in, Ls/2 , 0);
|
||||
ExtractSlice(p_minus, q_in, Ls/2-1 , 0);
|
||||
p_plus = spProj5p(p_plus );
|
||||
p_minus= spProj5m(p_minus);
|
||||
p=p_plus+p_minus;
|
||||
J5q = localInnerProduct(p,p);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
Current curr_type,
|
||||
unsigned int mu)
|
||||
{
|
||||
conformable(q_in_1.Grid(), FermionGrid());
|
||||
conformable(q_in_1.Grid(), q_in_2.Grid());
|
||||
conformable(_FourDimGrid, q_out.Grid());
|
||||
|
||||
PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid());
|
||||
unsigned int LLs = q_in_1.Grid()->_rdimensions[0];
|
||||
q_out = Zero();
|
||||
|
||||
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
|
||||
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
|
||||
tmp1 = Cshift(q_in_1, mu + 1, 1);
|
||||
tmp2 = Cshift(q_in_2, mu + 1, 1);
|
||||
auto q_in_1_v = q_in_1.View();
|
||||
auto q_in_2_v = q_in_2.View();
|
||||
auto tmp1_v = tmp1.View();
|
||||
auto tmp2_v = tmp2.View();
|
||||
auto q_out_v = q_out.View();
|
||||
auto Umu_v = Umu.View();
|
||||
thread_for(sU, Umu.Grid()->oSites(),{
|
||||
|
||||
unsigned int sF1 = sU * LLs;
|
||||
unsigned int sF2 = (sU + 1) * LLs - 1;
|
||||
|
||||
for (unsigned int s = 0; s < LLs; ++s)
|
||||
{
|
||||
bool axial_sign = ((curr_type == Current::Axial) && \
|
||||
(s < (LLs / 2)));
|
||||
SitePropagator qSite2, qmuSite2;
|
||||
|
||||
// If vectorised in 5th dimension, reverse q2 vector to match up
|
||||
// sites correctly.
|
||||
if (Impl::LsVectorised)
|
||||
{
|
||||
REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs);
|
||||
REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs);
|
||||
}
|
||||
else
|
||||
{
|
||||
qSite2 = q_in_2_v[sF2];
|
||||
qmuSite2 = tmp2_v[sF2];
|
||||
}
|
||||
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1],
|
||||
qSite2,
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu, axial_sign);
|
||||
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1],
|
||||
qmuSite2,
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu, axial_sign);
|
||||
sF1++;
|
||||
sF2--;
|
||||
}
|
||||
});
|
||||
}
|
||||
******************************************************************************/
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx)
|
||||
{
|
||||
conformable(q_in.Grid(), FermionGrid());
|
||||
conformable(q_in.Grid(), q_out.Grid());
|
||||
PropagatorField tmp(GaugeGrid()),tmp2(GaugeGrid());
|
||||
unsigned int tshift = (mu == Tp) ? 1 : 0;
|
||||
unsigned int LLs = q_in.Grid()->_rdimensions[0];
|
||||
unsigned int LLt = GridDefaultLatt()[Tp];
|
||||
|
||||
q_out = Zero();
|
||||
LatticeInteger coords(_FourDimGrid);
|
||||
LatticeCoordinate(coords, Tp);
|
||||
|
||||
auto q_out_v = q_out.View();
|
||||
auto tmp2_v = tmp2.View();
|
||||
auto coords_v= coords.View();
|
||||
auto Umu_v = Umu.View();
|
||||
for (unsigned int s = 0; s < LLs; ++s)
|
||||
{
|
||||
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
||||
bool tadpole_sign = (curr_type == Current::Tadpole);
|
||||
bool switch_sgn = tadpole_sign || axial_sign;
|
||||
|
||||
|
||||
//forward direction: Need q(x + mu, s)*A(x)
|
||||
ExtractSlice(tmp2, q_in, s, 0); //q(x,s)
|
||||
tmp = Cshift(tmp2, mu, 1); //q(x+mu,s)
|
||||
tmp2 = tmp*lattice_cmplx; //q(x+mu,s)*A(x)
|
||||
|
||||
thread_for(sU, Umu.Grid()->oSites(),{
|
||||
// Compute the sequential conserved current insertion only if our simd
|
||||
// object contains a timeslice we need.
|
||||
vPredicate t_mask;
|
||||
t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax));
|
||||
Integer timeSlices = Reduce(t_mask());
|
||||
|
||||
if (timeSlices > 0)
|
||||
{
|
||||
unsigned int sF = sU * LLs + s;
|
||||
Kernels::SeqConservedCurrentSiteFwd(tmp2_v[sU],
|
||||
q_out_v[sF], Umu_v, sU,
|
||||
mu, t_mask, switch_sgn);
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
//backward direction: Need q(x - mu, s)*A(x-mu)
|
||||
ExtractSlice(tmp2, q_in, s, 0); //q(x,s)
|
||||
tmp = lattice_cmplx*tmp2; //q(x,s)*A(x)
|
||||
tmp2 = Cshift(tmp, mu, -1); //q(x-mu,s)*A(x-mu,s)
|
||||
|
||||
thread_for(sU, Umu.Grid()->oSites(),
|
||||
{
|
||||
vPredicate t_mask;
|
||||
t_mask()= ((coords_v[sU] >= (tmin + tshift)) && (coords_v[sU] <= (tmax + tshift)));
|
||||
|
||||
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
||||
unsigned int t0 = 0;
|
||||
if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 ));
|
||||
|
||||
Integer timeSlices = Reduce(t_mask());
|
||||
|
||||
if (timeSlices > 0) {
|
||||
unsigned int sF = sU * LLs + s;
|
||||
Kernels::SeqConservedCurrentSiteBwd(tmp2_v[sU],
|
||||
q_out_v[sF], Umu_v, sU,
|
||||
mu, t_mask, axial_sign);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -102,21 +102,24 @@ void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
/////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||
void WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Dhop(in, out, DaggerNo);
|
||||
return axpy_norm(out, diag_mass, in, out);
|
||||
axpy(out, diag_mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||
void WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Dhop(in, out, DaggerYes);
|
||||
return axpy_norm(out, diag_mass, in, out);
|
||||
axpy(out, diag_mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
@ -125,7 +128,8 @@ void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
@ -134,26 +138,30 @@ void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
typename FermionField::scalar_type scal(diag_mass);
|
||||
out = scal * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Mooee(in, out);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
out = (1.0/(diag_mass))*in;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
|
||||
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MooeeInv(in,out);
|
||||
}
|
||||
@ -249,7 +257,8 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||
{
|
||||
conformable(U.Grid(), _grid);
|
||||
conformable(U.Grid(), V.Grid());
|
||||
conformable(U.Grid(), mat.Grid());
|
||||
@ -260,7 +269,8 @@ void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, cons
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||
{
|
||||
conformable(U.Grid(), _cbgrid);
|
||||
conformable(U.Grid(), V.Grid());
|
||||
//conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
|
||||
@ -274,7 +284,8 @@ void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, co
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||
{
|
||||
conformable(U.Grid(), _cbgrid);
|
||||
conformable(U.Grid(), V.Grid());
|
||||
//conformable(U.Grid(), mat.Grid());
|
||||
@ -287,7 +298,8 @@ void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, co
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
|
||||
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
conformable(in.Grid(), _grid); // verifies full grid
|
||||
conformable(in.Grid(), out.Grid());
|
||||
|
||||
@ -297,7 +309,8 @@ void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int da
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
|
||||
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
@ -308,7 +321,8 @@ void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
|
||||
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
@ -386,7 +400,8 @@ template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag) {
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
Compressor compressor(dag);
|
||||
@ -436,7 +451,8 @@ template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag) {
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
Compressor compressor(dag);
|
||||
st.HaloExchange(in, compressor);
|
||||
@ -459,6 +475,7 @@ template <class Impl>
|
||||
void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu)
|
||||
{
|
||||
@ -466,6 +483,7 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
conformable(_grid, q_in_1.Grid());
|
||||
conformable(_grid, q_in_2.Grid());
|
||||
conformable(_grid, q_out.Grid());
|
||||
#if 0
|
||||
PropagatorField tmp1(_grid), tmp2(_grid);
|
||||
q_out = Zero();
|
||||
|
||||
@ -489,12 +507,15 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu);
|
||||
});
|
||||
#else
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
@ -503,6 +524,7 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
{
|
||||
conformable(_grid, q_in.Grid());
|
||||
conformable(_grid, q_out.Grid());
|
||||
#if 0
|
||||
|
||||
// Lattice<iSinglet<Simd>> ph(_grid), coor(_grid);
|
||||
Complex i(0.0,1.0);
|
||||
@ -556,6 +578,8 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
Umu_v, sU, mu, t_mask);
|
||||
}
|
||||
});
|
||||
#else
|
||||
#endif
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -444,19 +444,19 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
|
||||
#ifndef GRID_NVCC
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); return;}
|
||||
#endif
|
||||
} else if( interior ) {
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;}
|
||||
#ifndef GRID_NVCC
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
|
||||
#ifndef GRID_NVCC
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;}
|
||||
#endif
|
||||
}
|
||||
assert(0 && " Kernel optimisation case not covered ");
|
||||
@ -493,131 +493,5 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
assert(0 && " Kernel optimisation case not covered ");
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* Conserved current utilities for Wilson fermions, for contracting propagators
|
||||
* to make a conserved current sink or inserting the conserved current
|
||||
* sequentially. Common to both 4D and 5D.
|
||||
******************************************************************************/
|
||||
// N.B. Functions below assume a -1/2 factor within U.
|
||||
#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr))
|
||||
#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr))
|
||||
|
||||
/*******************************************************************************
|
||||
* Name: ContractConservedCurrentSiteFwd
|
||||
* Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu]
|
||||
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
|
||||
* - Pass in q_in_1 shifted in +ve mu direction.
|
||||
******************************************************************************/
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result, tmp;
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
|
||||
Impl::multLink(tmp, U[sU], q_in_1, mu);
|
||||
|
||||
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
|
||||
|
||||
if (switch_sign) {
|
||||
q_out -= result;
|
||||
} else {
|
||||
q_out += result;
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* Name: ContractConservedCurrentSiteBwd
|
||||
* Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x]
|
||||
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
|
||||
* - Pass in q_in_2 shifted in +ve mu direction.
|
||||
******************************************************************************/
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result, tmp;
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
|
||||
Impl::multLink(tmp, U[sU], q_in_1, mu + Nd);
|
||||
|
||||
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
|
||||
if (switch_sign) {
|
||||
q_out += result;
|
||||
} else {
|
||||
q_out -= result;
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* Name: SeqConservedCurrentSiteFwd
|
||||
* Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu]
|
||||
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
|
||||
* - Pass in q_in shifted in +ve mu direction.
|
||||
******************************************************************************/
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vPredicate t_mask,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result;
|
||||
|
||||
Impl::multLink(result, U[sU], q_in, mu);
|
||||
result = WilsonCurrentFwd(result, mu);
|
||||
|
||||
// Zero any unwanted timeslice entries.
|
||||
result = predicatedWhere(t_mask, result, 0.*result);
|
||||
|
||||
if (switch_sign) {
|
||||
q_out -= result;
|
||||
} else {
|
||||
q_out += result;
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* Name: SeqConservedCurrentSiteFwd
|
||||
* Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu]
|
||||
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
|
||||
* - Pass in q_in shifted in -ve mu direction.
|
||||
******************************************************************************/
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vPredicate t_mask,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result;
|
||||
Impl::multLink(result, U[sU], q_in, mu + Nd);
|
||||
result = WilsonCurrentBwd(result, mu);
|
||||
|
||||
// Zero any unwanted timeslice entries.
|
||||
result = predicatedWhere(t_mask, result, 0.*result);
|
||||
|
||||
if (switch_sign) {
|
||||
q_out += result;
|
||||
} else {
|
||||
q_out -= result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -1,44 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h>
|
||||
|
||||
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h>
|
||||
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dgpu.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class CayleyFermion5D<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -0,0 +1 @@
|
||||
../CayleyFermion5DInstantiation.cc.master
|
@ -1,38 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class ContinuedFractionFermion5D<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
||||
../ContinuedFractionFermion5DInstantiation.cc.master
|
@ -1,44 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class DomainWallEOFAFermion<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
||||
../DomainWallEOFAFermionInstantiation.cc.master
|
@ -1,44 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class MobiusEOFAFermion<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
||||
../MobiusEOFAFermionInstantiation.cc.master
|
@ -1,39 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class PartialFractionFermion5D<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -0,0 +1 @@
|
||||
../PartialFractionFermion5DInstantiation.cc.master
|
@ -1,40 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/qcd/spin/Dirac.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class WilsonCloverFermion<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
||||
../WilsonCloverFermionInstantiation.cc.master
|
@ -1,40 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class WilsonFermion5D<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -0,0 +1 @@
|
||||
../WilsonFermion5DInstantiation.cc.master
|
@ -1,40 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class WilsonFermion<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -0,0 +1 @@
|
||||
../WilsonFermionInstantiation.cc.master
|
@ -1,74 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandGparityImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Move these
|
||||
|
||||
#include "impl.h"
|
||||
|
||||
// G-parity requires more specialised implementation.
|
||||
template <>
|
||||
void WilsonKernels<IMPLEMENTATION>::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
template <>
|
||||
void WilsonKernels<IMPLEMENTATION>::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int mu,
|
||||
unsigned int sU,
|
||||
bool switch_sign)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
HAND_SPECIALISE_GPARITY(IMPLEMENTATION);
|
||||
|
||||
|
||||
template class WilsonKernels<IMPLEMENTATION>;
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -0,0 +1 @@
|
||||
../WilsonKernelsInstantiationGparity.cc.master
|
@ -1,37 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonTMFermion.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/WilsonTMFermionImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class WilsonTMFermion<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
||||
../WilsonTMFermionInstantiation.cc.master
|
@ -1,44 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h>
|
||||
|
||||
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h>
|
||||
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dgpu.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class CayleyFermion5D<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -0,0 +1 @@
|
||||
../CayleyFermion5DInstantiation.cc.master
|
@ -1,38 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class ContinuedFractionFermion5D<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
||||
../ContinuedFractionFermion5DInstantiation.cc.master
|
@ -1,44 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class DomainWallEOFAFermion<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
||||
../DomainWallEOFAFermionInstantiation.cc.master
|
@ -1,44 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class MobiusEOFAFermion<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
||||
../MobiusEOFAFermionInstantiation.cc.master
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user