1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-15 14:27:06 +01:00

Compare commits

..

83 Commits

Author SHA1 Message Date
49b934310b resilient I/O fix 2018-11-27 20:17:09 +00:00
01e8cf5017 Merge branch 'develop' into feature/resilient-io 2018-11-27 19:09:59 +00:00
12f4499502 HDF5 serialiser fix 2018-11-27 19:09:50 +00:00
05aec72887 Hadrons: application parameter for resilient I/O 2018-11-27 18:46:43 +00:00
136d3802cb binary parallel IO can do read tests and eventually re-write in case of failure 2018-11-27 18:38:24 +00:00
a4c55406ed checksummed HDF5 IO 2018-11-27 17:43:19 +00:00
c7f33ca2a8 Revert "Hadrons: A2A vector write can fail and retry"
This reverts commit 10fc263675.
2018-11-27 17:27:26 +00:00
0e3035c51d Revert "optional non-fatal checksum fail in Lime lattice read (with error codes)"
This reverts commit bccfd4cbb3.
2018-11-27 17:27:20 +00:00
10fc263675 Hadrons: A2A vector write can fail and retry 2018-11-26 19:47:03 +00:00
bccfd4cbb3 optional non-fatal checksum fail in Lime lattice read (with error codes) 2018-11-26 19:45:51 +00:00
0b50d4a328 log time fix 2018-11-23 15:51:27 +00:00
e232257cb6 Hadrons: A2AAslashVector modul cleaning and renaming 2018-11-22 19:43:49 +00:00
09451b5e48 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-11-22 15:45:24 +00:00
6364aa8acf Merge branch 'feature/contractor' into develop 2018-11-22 15:44:46 +00:00
b9e84ecab7 Hadrons: minor code cleaning 2018-11-22 15:44:30 +00:00
41032fef44 Optional RW mode for Hdf5Reader 2018-11-21 18:36:50 +00:00
d77bc88170 Optional support for faster CRC32C checksum through Intel IPP 2018-11-19 17:21:53 +00:00
494b3c9e57 Hadrons: contractor more IO fix 2018-11-19 16:26:53 +00:00
2ba19a9e07 Hadrons: contractor IO fix 2018-11-19 16:17:51 +00:00
5d7cc29eaf Hadrons: contractor token @traj@ for trajectory number in input file 2018-11-19 16:04:01 +00:00
f22a27d7f9 Hadrons: contractor trajectory loop and file output 2018-11-19 15:45:04 +00:00
33a0bbb17b Const correctness 2018-11-19 11:27:57 +00:00
f592ec8baa Hadrons: contractor performance fix 2018-11-16 20:59:49 +00:00
8b007b5c24 Hadrons: remove the use of OpenMP reductions 2018-11-16 20:00:29 +00:00
9bb170576d Merge pull request #177 from guelpers/develop
Hadrons module to electrify a gauge
2018-11-14 16:04:09 +00:00
a7e3977b75 Merge remote-tracking branch 'upstream/develop' into develop 2018-11-13 14:56:23 +00:00
995f20e45d Hadrons: some renamings 2018-11-13 14:54:48 +00:00
d058b4e681 Merge branch 'feature/seqA2A' into develop 2018-11-13 13:27:24 +00:00
8e0d2f3402 Hadrons: support for twisted boundary conditions 2018-11-12 17:16:18 +00:00
2ac57370f1 Hadrons: contractor translation average normalisation 2018-11-12 16:04:35 +00:00
344e832a4e Hadrons: contractor faster transpose and finer timings 2018-11-12 15:59:54 +00:00
cfe281f1a4 Hadrons: diskvectors measure hash performance in debug output 2018-11-12 15:59:11 +00:00
f5422c7334 Hadrons: more contractor instrumentation 2018-11-09 16:23:53 +00:00
68c76a410d Hadrons: more contractor improvements 2018-11-08 19:24:29 +00:00
69b6ba0a73 Hadrons: contractor fixes and improvements 2018-11-08 18:46:28 +00:00
65349b07a7 Hadrons: simpler A2A perf functions 2018-11-08 18:44:44 +00:00
7cd9914f0e Hadrons: automatically resize output in MKL A2A matrix kernels 2018-11-08 17:40:57 +00:00
f3f24b3017 Optional Twisted BC's added, in "DoubleStore" for WilsonImpl.
Untested but doesn't affect answers when twists are all zero. The zero is the default behaviour
for ImplParams.
2018-11-08 12:55:25 +00:00
8ef4657805 Merge remote-tracking branch 'upstream/develop' into feature/seqA2A 2018-11-08 09:00:06 +00:00
78c1086f8b Hadrons: sequential Aslash insertion and propagator on A2A vector 2018-11-08 08:58:09 +00:00
68c13045d6 Added a test for Felix and Michael to look at 2018-11-07 23:40:15 +00:00
e9b6f58fdc Allow shrinking machine in orthog direction for extract slice local 2018-11-07 23:39:18 +00:00
839605c45c Verbose reduce 2018-11-07 23:38:46 +00:00
1ff1422e07 Hadrons: contractor lighter output 2018-11-07 20:02:53 +00:00
32376f0437 Hadrons: contractor performances 2018-11-07 19:59:11 +00:00
0c6e581336 Hadrons: first stab at general contraction code, needs serious testing 2018-11-07 19:16:55 +00:00
e0a79a5bbf Hadrons: PR#177: Electrify gauge: Single Precision fix 2018-11-07 15:01:22 +00:00
4c016cc1a4 Merge remote-tracking branch 'upstream/develop' into develop 2018-11-07 14:03:12 +00:00
2205b1e63e Add CXX to grid-config 2018-11-07 13:32:46 +00:00
6f421c7a6f Block solver in the SchurRedBlack plus timing report cleaner 2018-11-07 12:26:56 +00:00
b62b9ac214 Patch to broken assertion 2018-11-06 22:18:17 +00:00
88d9922e4f Hadrons: fast A2A matrix contraction kernels 2018-11-06 19:49:09 +00:00
9734e3ee58 Hadrons: (somewhat) faster build 2018-11-06 19:47:41 +00:00
8c3a599148 Block solver test 2018-11-06 16:44:58 +00:00
4a47b11876 Block CG improvements to develop 2018-11-06 12:49:05 +00:00
f1382cf81d Merge remote-tracking branch 'upstream/develop' into develop 2018-11-06 10:29:52 +00:00
85699daef2 Hadrons: Module to electrify a gauge field 2018-11-06 10:27:18 +00:00
1651111d18 Hadrons: final, portable form of the contractor benchmark 2018-11-05 21:29:13 +00:00
1ed4ea344d Merge branch 'develop' into feature/contractor 2018-11-05 11:42:02 +00:00
8f514ae550 Hadrons: Lanczos 32bit IO 2018-11-05 11:41:10 +00:00
4a7415e83c Hadrons: contractor benchmark update 2018-10-23 21:00:54 +01:00
0ffcfea724 Hadrons: contractor benchmark 2018-10-23 17:08:16 +01:00
febe41cc1d Hadrons: improvement on PR #176 2018-10-23 12:48:15 +01:00
62173395b8 Merge pull request #176 from guelpers/develop
Hadrons: full volume noise source for A2A
2018-10-23 12:29:35 +01:00
b48611b80f Merge branch 'develop' into feature/contractor 2018-10-22 18:27:18 +01:00
6b559d68aa Hadrons: eigenpack converter can do test reads 2018-10-22 11:10:18 +01:00
1982cc58dd Hadrons: A2A vectors I/O filename fix 2018-10-21 01:20:05 +01:00
2e2e5ce596 SciDAC I/O print data checksums 2018-10-19 20:36:32 +01:00
7d84dca8e9 Merge branch 'develop' into feature/contractor 2018-10-18 23:46:58 +01:00
2d3916418e Hadrons: more precision fix 2018-10-18 23:45:13 +01:00
21304e2139 Hadrons: fix to allow single-prec build again 2018-10-18 19:58:50 +01:00
7b850eb48b Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-10-18 19:46:25 +01:00
a3ace57e01 Hadrons copyright update 2018-10-18 19:46:11 +01:00
b1c3cbe35e Hadrons: A2A vectors I/O 2018-10-18 19:44:58 +01:00
f31d6bfec2 Hadrons: contractor cleaning and better error check 2018-10-18 17:50:35 +01:00
a7cfa26901 Hadrons: reverse A2A matrix load for better DiskVector cache reuse 2018-10-18 17:50:16 +01:00
f333f3e575 Hadrons: DiskVector save-on-eviction and faster CRC32 for Eigen matrices 2018-10-18 17:48:25 +01:00
2b4e253473 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-10-17 20:28:20 +01:00
0ba3d469c7 Benchmark IO in single and double precision 2018-10-17 20:27:34 +01:00
f709329d96 Hadrons: first version of a contractor utility 2018-10-17 20:26:48 +01:00
f05b25dae4 Hadrons: A2AMatrix load 2018-10-17 20:26:26 +01:00
3e1d268fa3 Hadrons: DiskVector optimisation 2018-10-17 20:25:32 +01:00
109c74bed8 Hadrons: full volume noise source for A2A 2018-10-16 14:56:12 +01:00
84 changed files with 4062 additions and 695 deletions

View File

@ -380,6 +380,12 @@ namespace Grid {
template<class Field> class OperatorFunction { template<class Field> class OperatorFunction {
public: public:
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0; virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) {
assert(in.size()==out.size());
for(int k=0;k<in.size();k++){
(*this)(Linop,in[k],out[k]);
}
};
}; };
template<class Field> class LinearFunction { template<class Field> class LinearFunction {

View File

@ -55,6 +55,14 @@ namespace Grid {
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> { template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
public: public:
virtual GridBase *RedBlackGrid(void)=0; virtual GridBase *RedBlackGrid(void)=0;
//////////////////////////////////////////////////////////////////////
// Query the even even properties to make algorithmic decisions
//////////////////////////////////////////////////////////////////////
virtual RealD Mass(void) { return 0.0; };
virtual int ConstEE(void) { return 0; }; // Disable assumptions unless overridden
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
// half checkerboard operaions // half checkerboard operaions
virtual void Meooe (const Field &in, Field &out)=0; virtual void Meooe (const Field &in, Field &out)=0;
virtual void Mooee (const Field &in, Field &out)=0; virtual void Mooee (const Field &in, Field &out)=0;

View File

@ -33,7 +33,7 @@ directory
namespace Grid { namespace Grid {
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec };
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction // Block conjugate gradient. Dimension zero should be the block direction
@ -42,7 +42,6 @@ template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> { class BlockConjugateGradient : public OperatorFunction<Field> {
public: public:
typedef typename Field::scalar_type scomplex; typedef typename Field::scalar_type scomplex;
int blockDim ; int blockDim ;
@ -54,21 +53,15 @@ class BlockConjugateGradient : public OperatorFunction<Field> {
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
Integer PrintInterval; //GridLogMessages or Iterative
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) : Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
{}; {};
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Thin QR factorisation (google it) // Thin QR factorisation (google it)
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
//Dimensions //Dimensions
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock // R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
@ -85,22 +78,20 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
// Cdag C = Rdag R ; passes. // Cdag C = Rdag R ; passes.
// QdagQ = 1 ; passes // QdagQ = 1 ; passes
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
sliceInnerProductMatrix(m_rr,R,R,Orthog); sliceInnerProductMatrix(m_rr,R,R,Orthog);
// Force manifest hermitian to avoid rounding related // Force manifest hermitian to avoid rounding related
m_rr = 0.5*(m_rr+m_rr.adjoint()); m_rr = 0.5*(m_rr+m_rr.adjoint());
#if 0
std::cout << " Calling Cholesky ldlt on m_rr " << m_rr <<std::endl;
Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();
std::cout << " Called Cholesky ldlt on m_rr " << L_ldlt <<std::endl;
auto D_ldlt = m_rr.ldlt().vectorD();
std::cout << " Called Cholesky ldlt on m_rr " << D_ldlt <<std::endl;
#endif
// std::cout << " Calling Cholesky llt on m_rr " <<std::endl;
Eigen::MatrixXcd L = m_rr.llt().matrixL(); Eigen::MatrixXcd L = m_rr.llt().matrixL();
// std::cout << " Called Cholesky llt on m_rr " << L <<std::endl;
C = L.adjoint(); C = L.adjoint();
Cinv = C.inverse(); Cinv = C.inverse();
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
@ -112,6 +103,25 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
sliceMulMatrix(Q,Cinv,R,Orthog); sliceMulMatrix(Q,Cinv,R,Orthog);
} }
// see comments above
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
std::vector<Field> & Q,
const std::vector<Field> & R)
{
InnerProductMatrix(m_rr,R,R);
m_rr = 0.5*(m_rr+m_rr.adjoint());
Eigen::MatrixXcd L = m_rr.llt().matrixL();
C = L.adjoint();
Cinv = C.inverse();
MulMatrix(Q,Cinv,R);
}
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Call one of several implementations // Call one of several implementations
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
@ -119,14 +129,20 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{ {
if ( CGtype == BlockCGrQ ) { if ( CGtype == BlockCGrQ ) {
BlockCGrQsolve(Linop,Src,Psi); BlockCGrQsolve(Linop,Src,Psi);
} else if (CGtype == BlockCG ) {
BlockCGsolve(Linop,Src,Psi);
} else if (CGtype == CGmultiRHS ) { } else if (CGtype == CGmultiRHS ) {
CGmultiRHSsolve(Linop,Src,Psi); CGmultiRHSsolve(Linop,Src,Psi);
} else { } else {
assert(0); assert(0);
} }
} }
virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)
{
if ( CGtype == BlockCGrQVec ) {
BlockCGrQsolveVec(Linop,Src,Psi);
} else {
assert(0);
}
}
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// BlockCGrQ implementation: // BlockCGrQ implementation:
@ -139,7 +155,8 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
{ {
int Orthog = blockDim; // First dimension is block dim; this is an assumption int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = B._grid->_fdimensions[Orthog]; Nblock = B._grid->_fdimensions[Orthog];
/* FAKE */
Nblock=8;
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
X.checkerboard = B.checkerboard; X.checkerboard = B.checkerboard;
@ -202,15 +219,10 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it) //1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
Linop.HermOp(X, AD); Linop.HermOp(X, AD);
tmp = B - AD; tmp = B - AD;
//std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
//std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
//std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
//std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
//std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
D=Q; D=Q;
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
@ -232,14 +244,12 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
MatrixTimer.Start(); MatrixTimer.Start();
Linop.HermOp(D, Z); Linop.HermOp(D, Z);
MatrixTimer.Stop(); MatrixTimer.Stop();
//std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
//4. M = [D^dag Z]^{-1} //4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start(); sliceInnerTimer.Start();
sliceInnerProductMatrix(m_DZ,D,Z,Orthog); sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
sliceInnerTimer.Stop(); sliceInnerTimer.Stop();
m_M = m_DZ.inverse(); m_M = m_DZ.inverse();
//std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
//5. X = X + D MC //5. X = X + D MC
m_tmp = m_M * m_C; m_tmp = m_M * m_C;
@ -257,6 +267,7 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
//7. D = Q + D S^dag //7. D = Q + D S^dag
m_tmp = m_S.adjoint(); m_tmp = m_S.adjoint();
sliceMaddTimer.Start(); sliceMaddTimer.Start();
sliceMaddMatrix(D,m_tmp,D,Q,Orthog); sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
sliceMaddTimer.Stop(); sliceMaddTimer.Stop();
@ -317,152 +328,6 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
IterationsToComplete = k; IterationsToComplete = k;
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
/************************************************************************
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
************************************************************************
* O'Leary : R = B - A X
* O'Leary : P = M R ; preconditioner M = 1
* O'Leary : alpha = PAP^{-1} RMR
* O'Leary : beta = RMR^{-1}_old RMR_new
* O'Leary : X=X+Palpha
* O'Leary : R_new=R_old-AP alpha
* O'Leary : P=MR_new+P beta
*/
R = Src - AP;
P = R;
sliceInnerProductMatrix(m_rr,R,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
m_pAp_inv = m_pAp.inverse();
m_alpha = m_pAp_inv * m_rr ;
// Psi, R update
sliceMaddTimer.Start();
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
m_rr_inv = m_rr.inverse();
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_rr,R,R,Orthog);
sliceInnerTimer.Stop();
m_beta = m_rr_inv *m_rr;
// Search update
sliceMaddTimer.Start();
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
sliceMaddTimer.Stop();
P= AP;
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
RealD rr;
for(int b=0;b<Nblock;b++){
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction // multiRHS conjugate gradient. Dimension zero should be the block direction
// Use this for spread out across nodes // Use this for spread out across nodes
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
@ -600,6 +465,233 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
IterationsToComplete = k; IterationsToComplete = k;
} }
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
for(int b=0;b<Nblock;b++){
for(int bp=0;bp<Nblock;bp++) {
m(b,bp) = innerProduct(X[b],Y[bp]);
}}
}
void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
// Should make this cache friendly with site outermost, parallel_for
// Deal with case AP aliases with either Y or X
std::vector<Field> tmp(Nblock,X[0]);
for(int b=0;b<Nblock;b++){
tmp[b] = Y[b];
for(int bp=0;bp<Nblock;bp++) {
tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp];
}
}
for(int b=0;b<Nblock;b++){
AP[b] = tmp[b];
}
}
void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
// Should make this cache friendly with site outermost, parallel_for
for(int b=0;b<Nblock;b++){
AP[b] = zero;
for(int bp=0;bp<Nblock;bp++) {
AP[b] += (m(bp,b))*X[bp];
}
}
}
double normv(const std::vector<Field> &P){
double nn = 0.0;
for(int b=0;b<Nblock;b++) {
nn+=norm2(P[b]);
}
return nn;
}
////////////////////////////////////////////////////////////////////////////
// BlockCGrQvec implementation:
//--------------------------
// X is guess/Solution
// B is RHS
// Solve A X_i = B_i ; i refers to Nblock index
////////////////////////////////////////////////////////////////////////////
void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)
{
Nblock = B.size();
assert(Nblock == X.size());
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
for(int b=0;b<Nblock;b++){
X[b].checkerboard = B[b].checkerboard;
conformable(X[b], B[b]);
conformable(X[b], X[0]);
}
Field Fake(B[0]);
std::vector<Field> tmp(Nblock,Fake);
std::vector<Field> Q(Nblock,Fake);
std::vector<Field> D(Nblock,Fake);
std::vector<Field> Z(Nblock,Fake);
std::vector<Field> AD(Nblock,Fake);
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
RealD sssum=0;
for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
/************************************************************************
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
************************************************************************
* Dimensions:
*
* X,B==(Nferm x Nblock)
* A==(Nferm x Nferm)
*
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
*
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
* for k:
* Z = AD
* M = [D^dag Z]^{-1}
* X = X + D MC
* QS = Q - ZM
* D = Q + D S^dag
* C = S C
*/
///////////////////////////////////////
// Initial block: initial search dir is guess
///////////////////////////////////////
std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
for(int b=0;b<Nblock;b++) {
Linop.HermOp(X[b], AD[b]);
tmp[b] = B[b] - AD[b];
}
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
for(int b=0;b<Nblock;b++) D[b]=Q[b];
std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
//3. Z = AD
MatrixTimer.Start();
for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);
MatrixTimer.Stop();
//4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start();
InnerProductMatrix(m_DZ,D,Z);
sliceInnerTimer.Stop();
m_M = m_DZ.inverse();
//5. X = X + D MC
m_tmp = m_M * m_C;
sliceMaddTimer.Start();
MaddMatrix(X,m_tmp, D,X);
sliceMaddTimer.Stop();
//6. QS = Q - ZM
sliceMaddTimer.Start();
MaddMatrix(tmp,m_M,Z,Q,-1.0);
sliceMaddTimer.Stop();
QRTimer.Start();
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
QRTimer.Stop();
//7. D = Q + D S^dag
m_tmp = m_S.adjoint();
sliceMaddTimer.Start();
MaddMatrix(D,m_tmp,D,Q);
sliceMaddTimer.Stop();
//8. C = S C
m_C = m_S*m_C;
/*********************
* convergence monitor
*********************
*/
m_rr = m_C.adjoint() * m_C;
RealD max_resid=0;
RealD rrsum=0;
RealD rr;
for(int b=0;b<Nblock;b++) {
rrsum+=real(m_rr(b,b));
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
}; };
} }

View File

@ -133,7 +133,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
LinalgTimer.Stop(); LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl; << " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
// Stopping condition // Stopping condition
if (cp <= rsq) { if (cp <= rsq) {
@ -150,13 +150,13 @@ class ConjugateGradient : public OperatorFunction<Field> {
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
std::cout << GridLogPerformance << "Time breakdown "<<std::endl; std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogPerformance << "\tElapsed " << SolverTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tInner " << InnerTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);

View File

@ -86,228 +86,22 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
*/ */
namespace Grid { namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Use base class to share code
///////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver // Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// Now make the norm reflect extra factor of Mee template<class Field> class SchurRedBlackBase {
template<class Field> class SchurRedBlackStaggeredSolve { protected:
private: typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
OperatorFunction<Field> & _HermitianRBSolver; OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise; int CBfactorise;
bool subGuess; bool subGuess;
public: public:
///////////////////////////////////////////////////// SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve " <<std::endl;
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
/////////////////////////////////////////////////////
// src_o = (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
//src_o = tmp; assert(src_o.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
guess(src_o, sol_o);
Mtmp = sol_o;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
// Fionn A2A boolean behavioural control
if (subGuess) sol_o = sol_o-Mtmp;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver reconstructed other CB" <<std::endl;
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
// Verify the unprec residual
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
}
}
};
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagMooeeSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0, const bool initSubGuess = false) : _HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=cb;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
guess(src_o,sol_o);
Mtmp = sol_o;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
// Fionn A2A boolean behavioural control
if (subGuess) sol_o = sol_o-Mtmp;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
}
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
_HermitianRBSolver(HermitianRBSolver) _HermitianRBSolver(HermitianRBSolver)
{ {
CBfactorise = 0; CBfactorise = 0;
@ -322,12 +116,86 @@ namespace Grid {
return subGuess; return subGuess;
} }
template<class Matrix> /////////////////////////////////////////////////////////////
// Shared code
/////////////////////////////////////////////////////////////
void operator() (Matrix & _Matrix,const Field &in, Field &out){ void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess; ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess); (*this)(_Matrix,in,out,guess);
} }
template<class Matrix,class Guesser> void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)
{
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Guesser>
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
int nblock = in.size();
std::vector<Field> src_o(nblock,grid);
std::vector<Field> sol_o(nblock,grid);
std::vector<Field> guess_save;
Field resid(fgrid);
Field tmp(grid);
////////////////////////////////////////////////
// Prepare RedBlack source
////////////////////////////////////////////////
for(int b=0;b<nblock;b++){
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
}
////////////////////////////////////////////////
// Make the guesses
////////////////////////////////////////////////
if ( subGuess ) guess_save.resize(nblock,grid);
for(int b=0;b<nblock;b++){
guess(src_o[b],sol_o[b]);
if ( subGuess ) {
guess_save[b] = sol_o[b];
}
}
//////////////////////////////////////////////////////////////
// Call the block solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
RedBlackSolve(_Matrix,src_o,sol_o);
////////////////////////////////////////////////
// A2A boolean behavioural control & reconstruct other checkerboard
////////////////////////////////////////////////
for(int b=0;b<nblock;b++) {
if (subGuess) sol_o[b] = sol_o[b] - guess_save[b];
///////// Needs even source //////////////
pickCheckerboard(Even,tmp,in[b]);
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
/////////////////////////////////////////////////
// Check unprec residual if possible
/////////////////////////////////////////////////
if ( ! subGuess ) {
_Matrix.M(out[b],resid);
resid = resid-in[b];
RealD ns = norm2(in[b]);
RealD nr = norm2(resid);
std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
} else {
std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
}
}
}
template<class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function // FIXME CGdiagonalMee not implemented virtual function
@ -335,20 +203,153 @@ namespace Grid {
GridBase *grid = _Matrix.RedBlackGrid(); GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid(); GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); Field resid(fgrid);
Field src_e(grid);
Field src_o(grid); Field src_o(grid);
Field sol_e(grid); Field src_e(grid);
Field sol_o(grid); Field sol_o(grid);
////////////////////////////////////////////////
// RedBlack source
////////////////////////////////////////////////
RedBlackSource(_Matrix,in,src_e,src_o);
////////////////////////////////
// Construct the guess
////////////////////////////////
Field tmp(grid);
guess(src_o,sol_o);
Field guess_save(grid);
guess_save = sol_o;
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
RedBlackSolve(_Matrix,src_o,sol_o);
////////////////////////////////////////////////
// Fionn A2A boolean behavioural control
////////////////////////////////////////////////
if (subGuess) sol_o= sol_o-guess_save;
///////////////////////////////////////////////////
// RedBlack solution needs the even source
///////////////////////////////////////////////////
RedBlackSolution(_Matrix,sol_o,src_e,out);
// Verify the unprec residual
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
} else {
std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
}
}
/////////////////////////////////////////////////////////////
// Override in derived. Not virtual as template methods
/////////////////////////////////////////////////////////////
virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0;
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0;
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) =0;
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)=0;
};
template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess)
{
}
//////////////////////////////////////////////////////
// Override RedBlack specialisation
//////////////////////////////////////////////////////
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid); Field tmp(grid);
Field Mtmp(grid); Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in); pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,in); pickCheckerboard(Odd ,src_o,src);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out); /////////////////////////////////////////////////////
// src_o = (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
}
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field sol_e(grid);
Field src_e(grid);
src_e = src_e_c; // Const correctness
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
}
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Site diagonal has Mooee on it.
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess) {};
//////////////////////////////////////////////////////
// Override RedBlack specialisation
//////////////////////////////////////////////////////
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field Mtmp(grid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,src);
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e) // src_o = Mdag * (source_o - Moe MeeInv source_e)
@ -358,99 +359,67 @@ namespace Grid {
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag // get the right MpcDag
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd); _HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
////////////////////////////////////////////////////////////// }
// Call the red-black solver virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
////////////////////////////////////////////////////////////// {
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; GridBase *grid = _Matrix.RedBlackGrid();
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd); GridBase *fgrid= _Matrix.Grid();
guess(src_o,tmp);
Mtmp = tmp;
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
// Fionn A2A boolean behavioural control
if (subGuess) tmp = tmp-Mtmp;
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
Field tmp(grid);
Field sol_e(grid);
Field src_e_i(grid);
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )... // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even); _Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even); src_e_i = src_e-tmp; assert( src_e_i.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even); _Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even); setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd ); setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
} }
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
} }
}; };
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver // Site diagonal is identity, right preconditioned by Mee^inv
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface // ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
//=> psi = MeeInv phi
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoMixed { template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
private:
LinearFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public: public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick // Wrap the usual normal equations Schur trick
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) : SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
_HermitianRBSolver(HermitianRBSolver) : SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess) {};
{
CBfactorise=0;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
template<class Matrix> virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
void operator() (Matrix & _Matrix,const Field &in, Field &out){ {
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid(); GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid(); GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid); Field tmp(grid);
Field Mtmp(grid); Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in); pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,in); pickCheckerboard(Odd ,src_o,src);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e) // src_o = Mdag * (source_o - Moe MeeInv source_e)
@ -461,43 +430,44 @@ namespace Grid {
// get the right MpcDag // get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd); _HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
}
////////////////////////////////////////////////////////////// virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
// Call the red-black solver {
////////////////////////////////////////////////////////////// GridBase *grid = _Matrix.RedBlackGrid();
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; GridBase *fgrid= _Matrix.Grid();
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
// _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd); Field sol_o_i(grid);
guess(src_o,tmp); Field tmp(grid);
Mtmp = tmp; Field sol_e(grid);
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
// Fionn A2A boolean behavioural control ////////////////////////////////////////////////
if (subGuess) tmp = tmp-Mtmp; // MooeeInv due to pecond
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd); ////////////////////////////////////////////////
_Matrix.MooeeInv(sol_o,tmp);
sol_o_i = tmp;
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )... // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even); _Matrix.Meooe(sol_o_i,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even); tmp = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even); _Matrix.MooeeInv(tmp,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even); setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd ); setCheckerboard(sol,sol_o_i); assert( sol_o_i.checkerboard ==Odd );
// Verify the unprec residual
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout << GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid " << std::sqrt(nr / ns) << " nr " << nr << " ns " << ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
}
}
}; };
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
} }
#endif #endif

View File

@ -50,8 +50,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
assert(0); assert(0);
} }
Grid_quiesce_nodes();
// Never clean up as done once. // Never clean up as done once.
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
@ -124,10 +122,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
// split the communicator // split the communicator
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
// int Nparent = parent._processors ; // int Nparent = parent._processors ;
// std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
int Nparent; int Nparent;
MPI_Comm_size(parent.communicator,&Nparent); MPI_Comm_size(parent.communicator,&Nparent);
// std::cout << " Parent size "<<Nparent <<std::endl;
int childsize=1; int childsize=1;
for(int d=0;d<processors.size();d++) { for(int d=0;d<processors.size();d++) {
@ -136,8 +132,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
int Nchild = Nparent/childsize; int Nchild = Nparent/childsize;
assert (childsize * Nchild == Nparent); assert (childsize * Nchild == Nparent);
// std::cout << " child size "<<childsize <<std::endl;
std::vector<int> ccoor(_ndimension); // coor within subcommunicator std::vector<int> ccoor(_ndimension); // coor within subcommunicator
std::vector<int> scoor(_ndimension); // coor of split within parent std::vector<int> scoor(_ndimension); // coor of split within parent
std::vector<int> ssize(_ndimension); // coor of split within parent std::vector<int> ssize(_ndimension); // coor of split within parent

View File

@ -413,7 +413,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0); assert(((uint64_t)ptr&0x3F)==0);
close(fd); close(fd);
WorldShmCommBufs[r] =ptr; WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl; // std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
} }
_ShmAlloc=1; _ShmAlloc=1;
_ShmAllocBytes = bytes; _ShmAllocBytes = bytes;
@ -455,7 +455,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0); assert(((uint64_t)ptr&0x3F)==0);
close(fd); close(fd);
WorldShmCommBufs[r] =ptr; WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl; // std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
} }
_ShmAlloc=1; _ShmAlloc=1;
_ShmAllocBytes = bytes; _ShmAllocBytes = bytes;
@ -499,7 +499,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#endif #endif
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl; // std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
if ( ptr == (void * )MAP_FAILED ) { if ( ptr == (void * )MAP_FAILED ) {
perror("failed mmap"); perror("failed mmap");
assert(0); assert(0);

View File

@ -464,9 +464,11 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
assert(orthog>=0); assert(orthog>=0);
for(int d=0;d<nh;d++){ for(int d=0;d<nh;d++){
if ( d!=orthog ) {
assert(lg->_processors[d] == hg->_processors[d]); assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]); assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
} }
}
// the above should guarantee that the operations are local // the above should guarantee that the operations are local
parallel_for(int idx=0;idx<lg->lSites();idx++){ parallel_for(int idx=0;idx<lg->lSites();idx++){
@ -485,7 +487,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
template<class vobj> template<class vobj>
void ExtractSliceLocal(Lattice<vobj> &lowDim, const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{ {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
@ -499,9 +501,11 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, const Lattice<vobj> & higherDim,in
assert(orthog>=0); assert(orthog>=0);
for(int d=0;d<nh;d++){ for(int d=0;d<nh;d++){
if ( d!=orthog ) {
assert(lg->_processors[d] == hg->_processors[d]); assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]); assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
} }
}
// the above should guarantee that the operations are local // the above should guarantee that the operations are local
parallel_for(int idx=0;idx<lg->lSites();idx++){ parallel_for(int idx=0;idx<lg->lSites();idx++){
@ -520,7 +524,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, const Lattice<vobj> & higherDim,in
template<class vobj> template<class vobj>
void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine) void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
{ {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;

View File

@ -146,9 +146,11 @@ public:
if ( log.timestamp ) { if ( log.timestamp ) {
log.StopWatch->Stop(); log.StopWatch->Stop();
GridTime now = log.StopWatch->Elapsed(); GridTime now = log.StopWatch->Elapsed();
if ( log.timing_mode==1 ) log.StopWatch->Reset(); if ( log.timing_mode==1 ) log.StopWatch->Reset();
log.StopWatch->Start(); log.StopWatch->Start();
stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ; stream << log.evidence()
<< now << log.background() << " : " ;
} }
stream << log.colour(); stream << log.colour();
return stream; return stream;

View File

@ -0,0 +1,3 @@
#include <Grid/GridCore.h>
int Grid::BinaryIO::latticeWriteMaxRetry = -1;

View File

@ -81,6 +81,7 @@ inline void removeWhitespace(std::string &key)
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
class BinaryIO { class BinaryIO {
public: public:
static int latticeWriteMaxRetry;
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// more byte manipulation helpers // more byte manipulation helpers
@ -370,7 +371,7 @@ PARALLEL_CRITICAL
#endif #endif
} else { } else {
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : " std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl; << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
std::ifstream fin; std::ifstream fin;
fin.open(file, std::ios::binary | std::ios::in); fin.open(file, std::ios::binary | std::ios::in);
if (control & BINARYIO_MASTER_APPEND) if (control & BINARYIO_MASTER_APPEND)
@ -582,7 +583,9 @@ PARALLEL_CRITICAL
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef typename vobj::Realified::scalar_type word; word w=0; typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
uint64_t lsites = grid->lSites(); uint64_t lsites = grid->lSites(), offsetCopy = offset;
int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry);
bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0);
std::vector<sobj> scalardata(lsites); std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
@ -597,9 +600,35 @@ PARALLEL_CRITICAL
grid->Barrier(); grid->Barrier();
timer.Stop(); timer.Stop();
while (attemptsLeft >= 0)
{
grid->Barrier();
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
if (checkWrite)
{
std::vector<fobj> ckiodata(lsites);
uint32_t cknersc_csum, ckscidac_csuma, ckscidac_csumb;
uint64_t ckoffset = offsetCopy;
std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl;
grid->Barrier();
IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
cknersc_csum,ckscidac_csuma,ckscidac_csumb);
if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb))
{
std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl;
offset = offsetCopy;
}
else
{
std::cout << GridLogMessage << "writeLatticeObject: read test checksum correct" << std::endl;
break;
}
}
attemptsLeft--;
}
std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl; std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
} }
@ -725,5 +754,6 @@ PARALLEL_CRITICAL
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
} }
}; };
} }
#endif #endif

View File

@ -233,7 +233,8 @@ class GridLimeReader : public BinaryIO {
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl; // std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
BinarySimpleMunger<sobj,sobj> munge; BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
///////////////////////////////////////////// /////////////////////////////////////////////
// Insist checksum is next record // Insist checksum is next record
///////////////////////////////////////////// /////////////////////////////////////////////

View File

@ -49,20 +49,38 @@ inline double usecond(void) {
typedef std::chrono::system_clock GridClock; typedef std::chrono::system_clock GridClock;
typedef std::chrono::time_point<GridClock> GridTimePoint; typedef std::chrono::time_point<GridClock> GridTimePoint;
typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridTime;
typedef std::chrono::microseconds GridUsecs;
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time) typedef std::chrono::seconds GridSecs;
typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridUsecs;
typedef std::chrono::microseconds GridTime;
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
{ {
stream << time.count()<<" ms"; stream << time.count()<<" s";
return stream; return stream;
} }
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::microseconds & time) inline std::ostream& operator<< (std::ostream & stream, const GridMillisecs & now)
{ {
stream << time.count()<<" usec"; GridSecs second(1);
auto secs = now/second ;
auto subseconds = now%second ;
auto fill = stream.fill();
stream << secs<<"."<<std::setw(3)<<std::setfill('0')<<subseconds.count()<<" s";
stream.fill(fill);
return stream; return stream;
} }
inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now)
{
GridSecs second(1);
auto seconds = now/second ;
auto subseconds = now%second ;
auto fill = stream.fill();
stream << seconds<<"."<<std::setw(6)<<std::setfill('0')<<subseconds.count()<<" s";
stream.fill(fill);
return stream;
}
class GridStopWatch { class GridStopWatch {
private: private:

View File

@ -44,12 +44,15 @@ namespace QCD {
struct WilsonImplParams { struct WilsonImplParams {
bool overlapCommsCompute; bool overlapCommsCompute;
std::vector<Real> twist_n_2pi_L;
std::vector<Complex> boundary_phases; std::vector<Complex> boundary_phases;
WilsonImplParams() : overlapCommsCompute(false) { WilsonImplParams() : overlapCommsCompute(false) {
boundary_phases.resize(Nd, 1.0); boundary_phases.resize(Nd, 1.0);
twist_n_2pi_L.resize(Nd, 0.0);
}; };
WilsonImplParams(const std::vector<Complex> phi) WilsonImplParams(const std::vector<Complex> phi) : boundary_phases(phi), overlapCommsCompute(false) {
: boundary_phases(phi), overlapCommsCompute(false) {} twist_n_2pi_L.resize(Nd, 0.0);
}
}; };
struct StaggeredImplParams { struct StaggeredImplParams {

View File

@ -64,11 +64,6 @@ namespace Grid {
virtual RealD M (const FermionField &in, FermionField &out)=0; virtual RealD M (const FermionField &in, FermionField &out)=0;
virtual RealD Mdag (const FermionField &in, FermionField &out)=0; virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
// Query the even even properties to make algorithmic decisions
virtual int ConstEE(void) { return 1; }; // clover returns zero as EE depends on gauge field
virtual int isTrivialEE(void) { return 0; };
virtual RealD Mass(void) {return 0.0;};
// half checkerboard operaions // half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out)=0; virtual void Meooe (const FermionField &in, FermionField &out)=0;
virtual void MeooeDag (const FermionField &in, FermionField &out)=0; virtual void MeooeDag (const FermionField &in, FermionField &out)=0;

View File

@ -240,16 +240,30 @@ namespace QCD {
GaugeLinkField tmp(GaugeGrid); GaugeLinkField tmp(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid); Lattice<iScalar<vInteger> > coor(GaugeGrid);
////////////////////////////////////////////////////
// apply any boundary phase or twists
////////////////////////////////////////////////////
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
////////// boundary phase /////////////
auto pha = Params.boundary_phases[mu]; auto pha = Params.boundary_phases[mu];
scalar_type phase( real(pha),imag(pha) ); scalar_type phase( real(pha),imag(pha) );
int Lmu = GaugeGrid->GlobalDimensions()[mu] - 1; int L = GaugeGrid->GlobalDimensions()[mu];
int Lmu = L - 1;
LatticeCoordinate(coor, mu); LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu); U = PeekIndex<LorentzIndex>(Umu, mu);
// apply any twists
RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L;
if ( theta != 0.0) {
scalar_type twphase(::cos(theta),::sin(theta));
U = twphase*U;
std::cout << GridLogMessage << " Twist ["<<mu<<"] "<< Params.twist_n_2pi_L[mu]<< " phase"<<phase <<std::endl;
}
tmp = where(coor == Lmu, phase * U, U); tmp = where(coor == Lmu, phase * U, U);
PokeIndex<LorentzIndex>(Uds, tmp, mu); PokeIndex<LorentzIndex>(Uds, tmp, mu);

View File

@ -61,9 +61,9 @@ Group & Hdf5Writer::getGroup(void)
} }
// Reader implementation /////////////////////////////////////////////////////// // Reader implementation ///////////////////////////////////////////////////////
Hdf5Reader::Hdf5Reader(const std::string &fileName) Hdf5Reader::Hdf5Reader(const std::string &fileName, const bool readOnly)
: fileName_(fileName) : fileName_(fileName)
, file_(fileName.c_str(), H5F_ACC_RDWR) , file_(fileName.c_str(), readOnly ? H5F_ACC_RDONLY : H5F_ACC_RDWR)
{ {
group_ = file_.openGroup("/"); group_ = file_.openGroup("/");
readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold", readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold",

View File

@ -54,7 +54,7 @@ namespace Grid
class Hdf5Reader: public Reader<Hdf5Reader> class Hdf5Reader: public Reader<Hdf5Reader>
{ {
public: public:
Hdf5Reader(const std::string &fileName); Hdf5Reader(const std::string &fileName, const bool readOnly = true);
virtual ~Hdf5Reader(void) = default; virtual ~Hdf5Reader(void) = default;
bool push(const std::string &s); bool push(const std::string &s);
void pop(void); void pop(void);
@ -124,8 +124,11 @@ namespace Grid
if (flatx.size() > dataSetThres_) if (flatx.size() > dataSetThres_)
{ {
H5NS::DataSet dataSet; H5NS::DataSet dataSet;
H5NS::DSetCreatPropList plist;
dataSet = group_.createDataSet(s, Hdf5Type<Element>::type(), dataSpace); plist.setChunk(dim.size(), dim.data());
plist.setFletcher32();
dataSet = group_.createDataSet(s, Hdf5Type<Element>::type(), dataSpace, plist);
dataSet.write(flatx.data(), Hdf5Type<Element>::type()); dataSet.write(flatx.data(), Hdf5Type<Element>::type());
} }
else else

View File

@ -47,6 +47,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#else #else
#define PARALLEL_FOR_LOOP #define PARALLEL_FOR_LOOP
#define PARALLEL_FOR_LOOP_INTERN #define PARALLEL_FOR_LOOP_INTERN
#define PARALLEL_FOR_LOOP_REDUCE(op, var)
#define PARALLEL_NESTED_LOOP2 #define PARALLEL_NESTED_LOOP2
#define PARALLEL_NESTED_LOOP5 #define PARALLEL_NESTED_LOOP5
#define PARALLEL_REGION #define PARALLEL_REGION
@ -58,6 +59,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for #define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for
#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for #define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for
#define parallel_for_nest5 PARALLEL_NESTED_LOOP5 for #define parallel_for_nest5 PARALLEL_NESTED_LOOP5 for
#define parallel_critical PARALLEL_CRITICAL
namespace Grid { namespace Grid {

View File

@ -28,16 +28,31 @@
extern "C" { extern "C" {
#include <openssl/sha.h> #include <openssl/sha.h>
} }
#ifdef USE_IPP
#include "ipp.h"
#endif
#pragma once #pragma once
class GridChecksum class GridChecksum
{ {
public: public:
static inline uint32_t crc32(void *data,size_t bytes) static inline uint32_t crc32(const void *data, size_t bytes)
{ {
return ::crc32(0L,(unsigned char *)data,bytes); return ::crc32(0L,(unsigned char *)data,bytes);
} }
#ifdef USE_IPP
static inline uint32_t crc32c(const void* data, size_t bytes)
{
uint32_t crc32c = ~(uint32_t)0;
ippsCRC32C_8u(reinterpret_cast<const unsigned char *>(data), bytes, &crc32c);
ippsSwapBytes_32u_I(&crc32c, 1);
return ~crc32c;
}
#endif
template <typename T> template <typename T>
static inline std::string sha256_string(const std::vector<T> &hash) static inline std::string sha256_string(const std::vector<T> &hash)
{ {

View File

@ -32,11 +32,19 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Hadrons/Global.hpp> #include <Hadrons/Global.hpp>
#include <Hadrons/TimerArray.hpp> #include <Hadrons/TimerArray.hpp>
#include <Grid/Eigen/unsupported/CXX11/Tensor> #include <Grid/Eigen/unsupported/CXX11/Tensor>
#ifdef USE_MKL
#include "mkl.h"
#include "mkl_cblas.h"
#endif
#ifndef HADRONS_A2AM_NAME #ifndef HADRONS_A2AM_NAME
#define HADRONS_A2AM_NAME "a2aMatrix" #define HADRONS_A2AM_NAME "a2aMatrix"
#endif #endif
#ifndef HADRONS_A2AM_IO_TYPE
#define HADRONS_A2AM_IO_TYPE ComplexF
#endif
#define HADRONS_A2AM_PARALLEL_IO #define HADRONS_A2AM_PARALLEL_IO
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
@ -51,6 +59,12 @@ BEGIN_HADRONS_NAMESPACE
template <typename T> template <typename T>
using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>; using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>;
template <typename T>
using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>;
template <typename T>
using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>;
/****************************************************************************** /******************************************************************************
* Abstract class for A2A kernels * * Abstract class for A2A kernels *
******************************************************************************/ ******************************************************************************/
@ -76,10 +90,15 @@ public:
// constructors // constructors
A2AMatrixIo(void) = default; A2AMatrixIo(void) = default;
A2AMatrixIo(std::string filename, std::string dataname, A2AMatrixIo(std::string filename, std::string dataname,
const unsigned int nt, const unsigned int ni, const unsigned int nt, const unsigned int ni = 0,
const unsigned int nj); const unsigned int nj = 0);
// destructor // destructor
~A2AMatrixIo(void) = default; ~A2AMatrixIo(void) = default;
// access
unsigned int getNi(void) const;
unsigned int getNj(void) const;
unsigned int getNt(void) const;
size_t getSize(void) const;
// file allocation // file allocation
template <typename MetadataType> template <typename MetadataType>
void initFile(const MetadataType &d, const unsigned int chunkSize); void initFile(const MetadataType &d, const unsigned int chunkSize);
@ -88,9 +107,11 @@ public:
const unsigned int blockSizei, const unsigned int blockSizej); const unsigned int blockSizei, const unsigned int blockSizej);
void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str, void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
const unsigned int i, const unsigned int j); const unsigned int i, const unsigned int j);
template <template <class> class Vec, typename VecT>
void load(Vec<VecT> &v, double *tRead = nullptr);
private: private:
std::string filename_, dataname_; std::string filename_{""}, dataname_{""};
unsigned int nt_, ni_, nj_; unsigned int nt_{0}, ni_{0}, nj_{0};
}; };
/****************************************************************************** /******************************************************************************
@ -136,6 +157,226 @@ private:
std::vector<IoHelper> nodeIo_; std::vector<IoHelper> nodeIo_;
}; };
/******************************************************************************
* A2A matrix contraction kernels *
******************************************************************************/
class A2AContraction
{
public:
// accTrMul(acc, a, b): acc += tr(a*b)
template <typename C, typename MatLeft, typename MatRight>
static inline void accTrMul(C &acc, const MatLeft &a, const MatRight &b)
{
if ((MatLeft::Options == Eigen::RowMajor) and
(MatRight::Options == Eigen::ColMajor))
{
parallel_for (unsigned int r = 0; r < a.rows(); ++r)
{
C tmp;
#ifdef USE_MKL
dotuRow(tmp, r, a, b);
#else
tmp = a.row(r).conjugate().dot(b.col(r));
#endif
parallel_critical
{
acc += tmp;
}
}
}
else
{
parallel_for (unsigned int c = 0; c < a.cols(); ++c)
{
C tmp;
#ifdef USE_MKL
dotuCol(tmp, c, a, b);
#else
tmp = a.col(c).conjugate().dot(b.row(c));
#endif
parallel_critical
{
acc += tmp;
}
}
}
}
template <typename MatLeft, typename MatRight>
static inline double accTrMulFlops(const MatLeft &a, const MatRight &b)
{
double n = a.rows()*a.cols();
return 8.*n;
}
// mul(res, a, b): res = a*b
#ifdef USE_MKL
template <template <class, int...> class Mat, int... Opts>
static inline void mul(Mat<ComplexD, Opts...> &res,
const Mat<ComplexD, Opts...> &a,
const Mat<ComplexD, Opts...> &b)
{
static const ComplexD one(1., 0.), zero(0., 0.);
if ((res.rows() != a.rows()) or (res.cols() != b.cols()))
{
res.resize(a.rows(), b.cols());
}
if (Mat<ComplexD, Opts...>::Options == Eigen::RowMajor)
{
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
res.data(), res.cols());
}
else if (Mat<ComplexD, Opts...>::Options == Eigen::ColMajor)
{
cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
res.data(), res.rows());
}
}
template <template <class, int...> class Mat, int... Opts>
static inline void mul(Mat<ComplexF, Opts...> &res,
const Mat<ComplexF, Opts...> &a,
const Mat<ComplexF, Opts...> &b)
{
static const ComplexF one(1., 0.), zero(0., 0.);
if ((res.rows() != a.rows()) or (res.cols() != b.cols()))
{
res.resize(a.rows(), b.cols());
}
if (Mat<ComplexF, Opts...>::Options == Eigen::RowMajor)
{
cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
res.data(), res.cols());
}
else if (Mat<ComplexF, Opts...>::Options == Eigen::ColMajor)
{
cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
res.data(), res.rows());
}
}
#else
template <typename Mat>
static inline void mul(Mat &res, const Mat &a, const Mat &b)
{
res = a*b;
}
#endif
template <typename Mat>
static inline double mulFlops(const Mat &a, const Mat &b)
{
double nr = a.rows(), nc = a.cols();
return nr*nr*(6.*nc + 2.*(nc - 1.));
}
private:
template <typename C, typename MatLeft, typename MatRight>
static inline void makeDotRowPt(C * &aPt, unsigned int &aInc, C * &bPt,
unsigned int &bInc, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
if (MatLeft::Options == Eigen::RowMajor)
{
aPt = a.data() + aRow*a.cols();
aInc = 1;
}
else if (MatLeft::Options == Eigen::ColMajor)
{
aPt = a.data() + aRow;
aInc = a.rows();
}
if (MatRight::Options == Eigen::RowMajor)
{
bPt = b.data() + aRow;
bInc = b.cols();
}
else if (MatRight::Options == Eigen::ColMajor)
{
bPt = b.data() + aRow*b.rows();
bInc = 1;
}
}
#ifdef USE_MKL
template <typename C, typename MatLeft, typename MatRight>
static inline void makeDotColPt(C * &aPt, unsigned int &aInc, C * &bPt,
unsigned int &bInc, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
if (MatLeft::Options == Eigen::RowMajor)
{
aPt = a.data() + aCol;
aInc = a.cols();
}
else if (MatLeft::Options == Eigen::ColMajor)
{
aPt = a.data() + aCol*a.rows();
aInc = 1;
}
if (MatRight::Options == Eigen::RowMajor)
{
bPt = b.data() + aCol*b.cols();
bInc = 1;
}
else if (MatRight::Options == Eigen::ColMajor)
{
bPt = b.data() + aCol;
bInc = b.rows();
}
}
template <typename MatLeft, typename MatRight>
static inline void dotuRow(ComplexF &res, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
const ComplexF *aPt, *bPt;
unsigned int aInc, bInc;
makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
cblas_cdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void dotuCol(ComplexF &res, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
const ComplexF *aPt, *bPt;
unsigned int aInc, bInc;
makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
cblas_cdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void dotuRow(ComplexD &res, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
const ComplexD *aPt, *bPt;
unsigned int aInc, bInc;
makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
cblas_zdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void dotuCol(ComplexD &res, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
const ComplexD *aPt, *bPt;
unsigned int aInc, bInc;
makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
}
#endif
};
/****************************************************************************** /******************************************************************************
* A2AMatrixIo template implementation * * A2AMatrixIo template implementation *
******************************************************************************/ ******************************************************************************/
@ -148,6 +389,31 @@ A2AMatrixIo<T>::A2AMatrixIo(std::string filename, std::string dataname,
, nt_(nt), ni_(ni), nj_(nj) , nt_(nt), ni_(ni), nj_(nj)
{} {}
// access //////////////////////////////////////////////////////////////////////
template <typename T>
unsigned int A2AMatrixIo<T>::getNt(void) const
{
return nt_;
}
template <typename T>
unsigned int A2AMatrixIo<T>::getNi(void) const
{
return ni_;
}
template <typename T>
unsigned int A2AMatrixIo<T>::getNj(void) const
{
return nj_;
}
template <typename T>
size_t A2AMatrixIo<T>::getSize(void) const
{
return nt_*ni_*nj_*sizeof(T);
}
// file allocation ///////////////////////////////////////////////////////////// // file allocation /////////////////////////////////////////////////////////////
template <typename T> template <typename T>
template <typename MetadataType> template <typename MetadataType>
@ -171,11 +437,12 @@ void A2AMatrixIo<T>::initFile(const MetadataType &d, const unsigned int chunkSiz
} }
// create the dataset // create the dataset
Hdf5Reader reader(filename_); Hdf5Reader reader(filename_, false);
push(reader, dataname_); push(reader, dataname_);
auto &group = reader.getGroup(); auto &group = reader.getGroup();
plist.setChunk(chunk.size(), chunk.data()); plist.setChunk(chunk.size(), chunk.data());
plist.setFletcher32();
dataset = group.createDataSet(HADRONS_A2AM_NAME, Hdf5Type<T>::type(), dataspace, plist); dataset = group.createDataSet(HADRONS_A2AM_NAME, Hdf5Type<T>::type(), dataspace, plist);
#else #else
HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library"); HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
@ -191,7 +458,7 @@ void A2AMatrixIo<T>::saveBlock(const T *data,
const unsigned int blockSizej) const unsigned int blockSizej)
{ {
#ifdef HAVE_HDF5 #ifdef HAVE_HDF5
Hdf5Reader reader(filename_); Hdf5Reader reader(filename_, false);
std::vector<hsize_t> count = {nt_, blockSizei, blockSizej}, std::vector<hsize_t> count = {nt_, blockSizei, blockSizej},
offset = {0, static_cast<hsize_t>(i), offset = {0, static_cast<hsize_t>(i),
static_cast<hsize_t>(j)}, static_cast<hsize_t>(j)},
@ -226,6 +493,80 @@ void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m,
saveBlock(m.data() + offset, i, j, blockSizei, blockSizej); saveBlock(m.data() + offset, i, j, blockSizei, blockSizej);
} }
template <typename T>
template <template <class> class Vec, typename VecT>
void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
{
#ifdef HAVE_HDF5
Hdf5Reader reader(filename_);
std::vector<hsize_t> hdim;
H5NS::DataSet dataset;
H5NS::DataSpace dataspace;
H5NS::CompType datatype;
push(reader, dataname_);
auto &group = reader.getGroup();
dataset = group.openDataSet(HADRONS_A2AM_NAME);
datatype = dataset.getCompType();
dataspace = dataset.getSpace();
hdim.resize(dataspace.getSimpleExtentNdims());
dataspace.getSimpleExtentDims(hdim.data());
if ((nt_*ni_*nj_ != 0) and
((hdim[0] != nt_) or (hdim[1] != ni_) or (hdim[2] != nj_)))
{
HADRONS_ERROR(Size, "all-to-all matrix size mismatch (got "
+ std::to_string(hdim[0]) + "x" + std::to_string(hdim[1]) + "x"
+ std::to_string(hdim[2]) + ", expected "
+ std::to_string(nt_) + "x" + std::to_string(ni_) + "x"
+ std::to_string(nj_));
}
else if (ni_*nj_ == 0)
{
if (hdim[0] != nt_)
{
HADRONS_ERROR(Size, "all-to-all time size mismatch (got "
+ std::to_string(hdim[0]) + ", expected "
+ std::to_string(nt_) + ")");
}
ni_ = hdim[1];
nj_ = hdim[2];
}
A2AMatrix<T> buf(ni_, nj_);
std::vector<hsize_t> count = {1, static_cast<hsize_t>(ni_),
static_cast<hsize_t>(nj_)},
stride = {1, 1, 1},
block = {1, 1, 1},
memCount = {static_cast<hsize_t>(ni_),
static_cast<hsize_t>(nj_)};
H5NS::DataSpace memspace(memCount.size(), memCount.data());
std::cout << "Loading timeslice";
std::cout.flush();
*tRead = 0.;
for (unsigned int tp1 = nt_; tp1 > 0; --tp1)
{
unsigned int t = tp1 - 1;
std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0};
if (t % 10 == 0)
{
std::cout << " " << t;
std::cout.flush();
}
dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
stride.data(), block.data());
if (tRead) *tRead -= usecond();
dataset.read(buf.data(), datatype, memspace, dataspace);
if (tRead) *tRead += usecond();
v[t] = buf.template cast<VecT>();
}
std::cout << std::endl;
#else
HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
#endif
}
/****************************************************************************** /******************************************************************************
* A2AMatrixBlockComputation template implementation * * A2AMatrixBlockComputation template implementation *
******************************************************************************/ ******************************************************************************/

View File

@ -36,7 +36,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
/****************************************************************************** /******************************************************************************
* Classes to generate V & W all-to-all vectors * * Class to generate V & W all-to-all vectors *
******************************************************************************/ ******************************************************************************/
template <typename FImpl> template <typename FImpl>
class A2AVectorsSchurDiagTwo class A2AVectorsSchurDiagTwo
@ -70,6 +70,42 @@ private:
SchurDiagTwoOperator<FMat, FermionField> op_; SchurDiagTwoOperator<FMat, FermionField> op_;
}; };
/******************************************************************************
* Methods for V & W all-to-all vectors I/O *
******************************************************************************/
class A2AVectorsIo
{
public:
struct Record: Serializable
{
GRID_SERIALIZABLE_CLASS_MEMBERS(Record,
unsigned int, index);
Record(void): index(0) {}
};
public:
template <typename Field>
static void write(const std::string fileStem, std::vector<Field> &vec,
const bool multiFile, const int trajectory = -1);
template <typename Field>
static void read(std::vector<Field> &vec, const std::string fileStem,
const bool multiFile, const int trajectory = -1);
private:
static inline std::string vecFilename(const std::string stem, const int traj,
const bool multiFile)
{
std::string t = (traj < 0) ? "" : ("." + std::to_string(traj));
if (multiFile)
{
return stem + t;
}
else
{
return stem + t + ".bin";
}
}
};
/****************************************************************************** /******************************************************************************
* A2AVectorsSchurDiagTwo template implementation * * A2AVectorsSchurDiagTwo template implementation *
******************************************************************************/ ******************************************************************************/
@ -217,6 +253,90 @@ void A2AVectorsSchurDiagTwo<FImpl>::makeHighModeW5D(FermionField &wout_4d,
} }
} }
/******************************************************************************
* all-to-all vectors I/O template implementation *
******************************************************************************/
template <typename Field>
void A2AVectorsIo::write(const std::string fileStem, std::vector<Field> &vec,
const bool multiFile, const int trajectory)
{
Record record;
GridBase *grid = vec[0]._grid;
ScidacWriter binWriter(grid->IsBoss());
std::string filename = vecFilename(fileStem, trajectory, multiFile);
if (multiFile)
{
std::string fullFilename;
for (unsigned int i = 0; i < vec.size(); ++i)
{
fullFilename = filename + "/elem" + std::to_string(i) + ".bin";
LOG(Message) << "Writing vector " << i << std::endl;
makeFileDir(fullFilename, grid);
binWriter.open(fullFilename);
record.index = i;
binWriter.writeScidacFieldRecord(vec[i], record);
binWriter.close();
}
}
else
{
makeFileDir(filename, grid);
binWriter.open(filename);
for (unsigned int i = 0; i < vec.size(); ++i)
{
LOG(Message) << "Writing vector " << i << std::endl;
record.index = i;
binWriter.writeScidacFieldRecord(vec[i], record);
}
binWriter.close();
}
}
template <typename Field>
void A2AVectorsIo::read(std::vector<Field> &vec, const std::string fileStem,
const bool multiFile, const int trajectory)
{
Record record;
ScidacReader binReader;
std::string filename = vecFilename(fileStem, trajectory, multiFile);
if (multiFile)
{
std::string fullFilename;
for (unsigned int i = 0; i < vec.size(); ++i)
{
fullFilename = filename + "/elem" + std::to_string(i) + ".bin";
LOG(Message) << "Reading vector " << i << std::endl;
binReader.open(fullFilename);
binReader.readScidacFieldRecord(vec[i], record);
binReader.close();
if (record.index != i)
{
HADRONS_ERROR(Io, "vector index mismatch");
}
}
}
else
{
binReader.open(filename);
for (unsigned int i = 0; i < vec.size(); ++i)
{
LOG(Message) << "Reading vector " << i << std::endl;
binReader.readScidacFieldRecord(vec[i], record);
if (record.index != i)
{
HADRONS_ERROR(Io, "vector index mismatch");
}
}
binReader.close();
}
}
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // A2A_Vectors_hpp_ #endif // A2A_Vectors_hpp_

View File

@ -108,6 +108,9 @@ void Application::run(void)
HADRONS_ERROR(Definition, "run id is empty"); HADRONS_ERROR(Definition, "run id is empty");
} }
LOG(Message) << "RUN ID '" << getPar().runId << "'" << std::endl; LOG(Message) << "RUN ID '" << getPar().runId << "'" << std::endl;
BinaryIO::latticeWriteMaxRetry = getPar().parallelWriteMaxRetry;
LOG(Message) << "Attempt(s) for resilient parallel I/O: "
<< BinaryIO::latticeWriteMaxRetry << std::endl;
vm().setRunId(getPar().runId); vm().setRunId(getPar().runId);
vm().printContent(); vm().printContent();
env().printContent(); env().printContent();

View File

@ -56,7 +56,9 @@ public:
TrajRange, trajCounter, TrajRange, trajCounter,
VirtualMachine::GeneticPar, genetic, VirtualMachine::GeneticPar, genetic,
std::string, runId, std::string, runId,
std::string, graphFile); std::string, graphFile,
int, parallelWriteMaxRetry);
GlobalPar(void): parallelWriteMaxRetry{-1} {}
}; };
public: public:
// constructors // constructors

View File

@ -7,6 +7,7 @@ Source file: Hadrons/DilutedNoise.hpp
Copyright (C) 2015-2018 Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -76,6 +77,22 @@ private:
unsigned int nt_; unsigned int nt_;
}; };
template <typename FImpl>
class FullVolumeSpinColorDiagonalNoise: public DilutedNoise<FImpl>
{
public:
typedef typename FImpl::FermionField FermionField;
public:
// constructor/destructor
FullVolumeSpinColorDiagonalNoise(GridCartesian *g, unsigned int n_src);
virtual ~FullVolumeSpinColorDiagonalNoise(void) = default;
// generate noise
virtual void generateNoise(GridParallelRNG &rng);
private:
unsigned int nSrc_;
};
/****************************************************************************** /******************************************************************************
* DilutedNoise template implementation * * DilutedNoise template implementation *
******************************************************************************/ ******************************************************************************/
@ -186,6 +203,47 @@ void TimeDilutedSpinColorDiagonalNoise<FImpl>::generateNoise(GridParallelRNG &rn
} }
} }
/******************************************************************************
* FullVolumeSpinColorDiagonalNoise template implementation *
******************************************************************************/
template <typename FImpl>
FullVolumeSpinColorDiagonalNoise<FImpl>::
FullVolumeSpinColorDiagonalNoise(GridCartesian *g, unsigned int nSrc)
: DilutedNoise<FImpl>(g, nSrc*Ns*FImpl::Dimension), nSrc_(nSrc)
{}
template <typename FImpl>
void FullVolumeSpinColorDiagonalNoise<FImpl>::generateNoise(GridParallelRNG &rng)
{
typedef decltype(peekColour((*this)[0], 0)) SpinField;
auto &noise = *this;
auto g = this->getGrid();
auto nd = g->GlobalDimensions().size();
auto nc = FImpl::Dimension;
Complex shift(1., 1.);
LatticeComplex eta(g);
SpinField etas(g);
unsigned int i = 0;
bernoulli(rng, eta);
eta = (2.*eta - shift)*(1./::sqrt(2.));
for (unsigned int n = 0; n < nSrc_; ++n)
{
for (unsigned int s = 0; s < Ns; ++s)
{
etas = zero;
pokeSpin(etas, eta, s);
for (unsigned int c = 0; c < nc; ++c)
{
noise[i] = zero;
pokeColour(noise[i], etas, c);
i++;
}
}
}
}
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_DilutedNoise_hpp_ #endif // Hadrons_DilutedNoise_hpp_

View File

@ -29,6 +29,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
#define Hadrons_DiskVector_hpp_ #define Hadrons_DiskVector_hpp_
#include <Hadrons/Global.hpp> #include <Hadrons/Global.hpp>
#include <Hadrons/A2AMatrix.hpp>
#include <deque> #include <deque>
#include <sys/stat.h> #include <sys/stat.h>
#include <ftw.h> #include <ftw.h>
@ -59,14 +60,18 @@ public:
: master_(master), cmaster_(master), i_(i) {} : master_(master), cmaster_(master), i_(i) {}
// operator=: somebody is trying to store a vector element // operator=: somebody is trying to store a vector element
// write to disk and cache // write to cache and tag as modified
T &operator=(const T &obj) const T &operator=(const T &obj) const
{ {
auto &cache = *master_.cachePtr_;
auto &modified = *master_.modifiedPtr_;
auto &index = *master_.indexPtr_;
DV_DEBUG_MSG(&master_, "writing to " << i_); DV_DEBUG_MSG(&master_, "writing to " << i_);
master_.cacheInsert(i_, obj); master_.cacheInsert(i_, obj);
master_.save(master_.filename(i_), obj); modified[index.at(i_)] = true;
return master_.cachePtr_->at(i_); return cache[index.at(i_)];
} }
// implicit cast to const object reference and redirection // implicit cast to const object reference and redirection
@ -83,6 +88,7 @@ public:
public: public:
DiskVectorBase(const std::string dirname, const unsigned int size = 0, DiskVectorBase(const std::string dirname, const unsigned int size = 0,
const unsigned int cacheSize = 1, const bool clean = true); const unsigned int cacheSize = 1, const bool clean = true);
DiskVectorBase(DiskVectorBase<T> &&v) = default;
virtual ~DiskVectorBase(void); virtual ~DiskVectorBase(void);
const T & operator[](const unsigned int i) const; const T & operator[](const unsigned int i) const;
RwAccessHelper operator[](const unsigned int i); RwAccessHelper operator[](const unsigned int i);
@ -103,7 +109,10 @@ private:
bool clean_; bool clean_;
// using pointers to allow modifications when class is const // using pointers to allow modifications when class is const
// semantic: const means data unmodified, but cache modification allowed // semantic: const means data unmodified, but cache modification allowed
std::unique_ptr<std::map<unsigned int, T>> cachePtr_; std::unique_ptr<std::vector<T>> cachePtr_;
std::unique_ptr<std::vector<bool>> modifiedPtr_;
std::unique_ptr<std::map<unsigned int, unsigned int>> indexPtr_;
std::unique_ptr<std::stack<unsigned int>> freePtr_;
std::unique_ptr<std::deque<unsigned int>> loadsPtr_; std::unique_ptr<std::deque<unsigned int>> loadsPtr_;
}; };
@ -135,7 +144,7 @@ private:
* Specialisation for Eigen matrices * * Specialisation for Eigen matrices *
******************************************************************************/ ******************************************************************************/
template <typename T> template <typename T>
using EigenDiskVectorMat = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>; using EigenDiskVectorMat = A2AMatrix<T>;
template <typename T> template <typename T>
class EigenDiskVector: public DiskVectorBase<EigenDiskVectorMat<T>> class EigenDiskVector: public DiskVectorBase<EigenDiskVectorMat<T>>
@ -153,23 +162,30 @@ private:
virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const
{ {
std::ifstream f(filename, std::ios::binary); std::ifstream f(filename, std::ios::binary);
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH); uint32_t crc, check;
Eigen::Index nRow, nCol; Eigen::Index nRow, nCol;
size_t matSize; size_t matSize;
double t; double tRead, tHash;
f.read(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char)); f.read(reinterpret_cast<char *>(&crc), sizeof(crc));
f.read(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index)); f.read(reinterpret_cast<char *>(&nRow), sizeof(nRow));
f.read(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index)); f.read(reinterpret_cast<char *>(&nCol), sizeof(nCol));
obj.resize(nRow, nCol); obj.resize(nRow, nCol);
matSize = nRow*nCol*sizeof(T); matSize = nRow*nCol*sizeof(T);
t = -usecond(); tRead = -usecond();
f.read(reinterpret_cast<char *>(obj.data()), matSize); f.read(reinterpret_cast<char *>(obj.data()), matSize);
t += usecond(); tRead += usecond();
DV_DEBUG_MSG(this, "Eigen read " << matSize/t*1.0e6/1024/1024 << " MB/s"); tHash = -usecond();
auto check = GridChecksum::sha256(obj.data(), matSize); #ifdef USE_IPP
DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(check)); check = GridChecksum::crc32c(obj.data(), matSize);
if (hash != check) #else
check = GridChecksum::crc32(obj.data(), matSize);
#endif
tHash += usecond();
DV_DEBUG_MSG(this, "Eigen read " << tRead/1.0e6 << " sec " << matSize/tRead*1.0e6/1024/1024 << " MB/s");
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << check << std::dec
<< " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
if (crc != check)
{ {
HADRONS_ERROR(Io, "checksum failed") HADRONS_ERROR(Io, "checksum failed")
} }
@ -178,23 +194,30 @@ private:
virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const
{ {
std::ofstream f(filename, std::ios::binary); std::ofstream f(filename, std::ios::binary);
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH); uint32_t crc;
Eigen::Index nRow, nCol; Eigen::Index nRow, nCol;
size_t matSize; size_t matSize;
double t; double tWrite, tHash;
nRow = obj.rows(); nRow = obj.rows();
nCol = obj.cols(); nCol = obj.cols();
matSize = nRow*nCol*sizeof(T); matSize = nRow*nCol*sizeof(T);
hash = GridChecksum::sha256(obj.data(), matSize); tHash = -usecond();
DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(hash)); #ifdef USE_IPP
f.write(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char)); crc = GridChecksum::crc32c(obj.data(), matSize);
f.write(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index)); #else
f.write(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index)); crc = GridChecksum::crc32(obj.data(), matSize);
t = -usecond(); #endif
tHash += usecond();
f.write(reinterpret_cast<char *>(&crc), sizeof(crc));
f.write(reinterpret_cast<char *>(&nRow), sizeof(nRow));
f.write(reinterpret_cast<char *>(&nCol), sizeof(nCol));
tWrite = -usecond();
f.write(reinterpret_cast<const char *>(obj.data()), matSize); f.write(reinterpret_cast<const char *>(obj.data()), matSize);
t += usecond(); tWrite += usecond();
DV_DEBUG_MSG(this, "Eigen write " << matSize/t*1.0e6/1024/1024 << " MB/s"); DV_DEBUG_MSG(this, "Eigen write " << tWrite/1.0e6 << " sec " << matSize/tWrite*1.0e6/1024/1024 << " MB/s");
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << crc << std::dec
<< " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
} }
}; };
@ -207,7 +230,10 @@ DiskVectorBase<T>::DiskVectorBase(const std::string dirname,
const unsigned int cacheSize, const unsigned int cacheSize,
const bool clean) const bool clean)
: dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean) : dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean)
, cachePtr_(new std::map<unsigned int, T>()) , cachePtr_(new std::vector<T>(size))
, modifiedPtr_(new std::vector<bool>(size, false))
, indexPtr_(new std::map<unsigned int, unsigned int>())
, freePtr_(new std::stack<unsigned int>)
, loadsPtr_(new std::deque<unsigned int>()) , loadsPtr_(new std::deque<unsigned int>())
{ {
struct stat s; struct stat s;
@ -217,6 +243,10 @@ DiskVectorBase<T>::DiskVectorBase(const std::string dirname,
HADRONS_ERROR(Io, "directory '" + dirname + "' already exists") HADRONS_ERROR(Io, "directory '" + dirname + "' already exists")
} }
mkdir(dirname); mkdir(dirname);
for (unsigned int i = 0; i < cacheSize_; ++i)
{
freePtr_->push(i);
}
} }
template <typename T> template <typename T>
@ -232,6 +262,8 @@ template <typename T>
const T & DiskVectorBase<T>::operator[](const unsigned int i) const const T & DiskVectorBase<T>::operator[](const unsigned int i) const
{ {
auto &cache = *cachePtr_; auto &cache = *cachePtr_;
auto &index = *indexPtr_;
auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_; auto &loads = *loadsPtr_;
DV_DEBUG_MSG(this, "accessing " << i << " (RO)"); DV_DEBUG_MSG(this, "accessing " << i << " (RO)");
@ -241,7 +273,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const
HADRONS_ERROR(Size, "index out of range"); HADRONS_ERROR(Size, "index out of range");
} }
const_cast<double &>(access_)++; const_cast<double &>(access_)++;
if (cache.find(i) == cache.end()) if (index.find(i) == index.end())
{ {
// cache miss // cache miss
DV_DEBUG_MSG(this, "cache miss"); DV_DEBUG_MSG(this, "cache miss");
@ -268,7 +300,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const
DV_DEBUG_MSG(this, "in cache: " << msg); DV_DEBUG_MSG(this, "in cache: " << msg);
#endif #endif
return cache.at(i); return cache[index.at(i)];
} }
template <typename T> template <typename T>
@ -307,12 +339,23 @@ template <typename T>
void DiskVectorBase<T>::evict(void) const void DiskVectorBase<T>::evict(void) const
{ {
auto &cache = *cachePtr_; auto &cache = *cachePtr_;
auto &modified = *modifiedPtr_;
auto &index = *indexPtr_;
auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_; auto &loads = *loadsPtr_;
if (cache.size() >= cacheSize_) if (index.size() >= cacheSize_)
{ {
DV_DEBUG_MSG(this, "evicting " << loads.front()); unsigned int i = loads.front();
cache.erase(loads.front());
DV_DEBUG_MSG(this, "evicting " << i);
if (modified[index.at(i)])
{
DV_DEBUG_MSG(this, "element " << i << " modified, saving to disk");
save(filename(i), cache[index.at(i)]);
}
freeInd.push(index.at(i));
index.erase(i);
loads.pop_front(); loads.pop_front();
} }
} }
@ -321,29 +364,43 @@ template <typename T>
void DiskVectorBase<T>::fetch(const unsigned int i) const void DiskVectorBase<T>::fetch(const unsigned int i) const
{ {
auto &cache = *cachePtr_; auto &cache = *cachePtr_;
auto &modified = *modifiedPtr_;
auto &index = *indexPtr_;
auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_; auto &loads = *loadsPtr_;
struct stat s; struct stat s;
DV_DEBUG_MSG(this, "loading " << i << " from disk"); DV_DEBUG_MSG(this, "loading " << i << " from disk");
evict(); evict();
if(stat(filename(i).c_str(), &s) != 0) if(stat(filename(i).c_str(), &s) != 0)
{ {
HADRONS_ERROR(Io, "disk vector element " + std::to_string(i) + " uninitialised"); HADRONS_ERROR(Io, "disk vector element " + std::to_string(i) + " uninitialised");
} }
load(cache[i], filename(i)); index[i] = freeInd.top();
freeInd.pop();
load(cache[index.at(i)], filename(i));
loads.push_back(i); loads.push_back(i);
modified[index.at(i)] = false;
} }
template <typename T> template <typename T>
void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
{ {
auto &cache = *cachePtr_; auto &cache = *cachePtr_;
auto &modified = *modifiedPtr_;
auto &index = *indexPtr_;
auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_; auto &loads = *loadsPtr_;
evict(); evict();
cache[i] = obj; index[i] = freeInd.top();
freeInd.pop();
cache[index.at(i)] = obj;
loads.push_back(i); loads.push_back(i);
modified[index.at(i)] = false;
#ifdef DV_DEBUG #ifdef DV_DEBUG
std::string msg; std::string msg;

View File

@ -166,7 +166,13 @@ std::string Hadrons::dirname(const std::string &s)
void Hadrons::makeFileDir(const std::string filename, GridBase *g) void Hadrons::makeFileDir(const std::string filename, GridBase *g)
{ {
if (g->IsBoss()) bool doIt = true;
if (g)
{
doIt = g->IsBoss();
}
if (doIt)
{ {
std::string dir = dirname(filename); std::string dir = dirname(filename);
int status = mkdir(dir); int status = mkdir(dir);

View File

@ -32,6 +32,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <set> #include <set>
#include <stack> #include <stack>
#include <regex>
#include <Grid/Grid.h> #include <Grid/Grid.h>
#include <cxxabi.h> #include <cxxabi.h>
@ -217,15 +218,15 @@ typedef XmlReader ResultReader;
typedef XmlWriter ResultWriter; typedef XmlWriter ResultWriter;
#endif #endif
#define RESULT_FILE_NAME(name) \ #define RESULT_FILE_NAME(name, traj) \
name + "." + std::to_string(vm().getTrajectory()) + "." + resultFileExt name + "." + std::to_string(traj) + "." + resultFileExt
// recursive mkdir // recursive mkdir
#define MAX_PATH_LENGTH 512u #define MAX_PATH_LENGTH 512u
int mkdir(const std::string dirName); int mkdir(const std::string dirName);
std::string basename(const std::string &s); std::string basename(const std::string &s);
std::string dirname(const std::string &s); std::string dirname(const std::string &s);
void makeFileDir(const std::string filename, GridBase *g); void makeFileDir(const std::string filename, GridBase *g = nullptr);
// default Schur convention // default Schur convention
#ifndef HADRONS_DEFAULT_SCHUR #ifndef HADRONS_DEFAULT_SCHUR
@ -248,6 +249,20 @@ void makeFileDir(const std::string filename, GridBase *g);
// pretty print time profile // pretty print time profile
void printTimeProfile(const std::map<std::string, GridTime> &timing, GridTime total); void printTimeProfile(const std::map<std::string, GridTime> &timing, GridTime total);
// token replacement utility
template <typename T>
void tokenReplace(std::string &str, const std::string token,
const T &x, const std::string mark = "@")
{
std::string fullToken = mark + token + mark;
auto pos = str.find(fullToken);
if (pos != std::string::npos)
{
str.replace(pos, fullToken.size(), std::to_string(x));
}
}
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#include <Hadrons/Exceptions.hpp> #include <Hadrons/Exceptions.hpp>

View File

@ -5,17 +5,17 @@ lib_LIBRARIES = libHadrons.a
include modules.inc include modules.inc
libHadrons_a_SOURCES = \ libHadrons_a_SOURCES = \
$(modules_cc) \
Application.cc \ Application.cc \
Environment.cc \ Environment.cc \
Exceptions.cc \ Exceptions.cc \
Global.cc \ Global.cc \
Module.cc \ Module.cc \
TimerArray.cc \ TimerArray.cc \
VirtualMachine.cc VirtualMachine.cc \
$(modules_cc)
libHadrons_adir = $(includedir)/Hadrons libHadrons_adir = $(includedir)/Hadrons
nobase_libHadrons_a_HEADERS = \ nobase_libHadrons_a_HEADERS = \
$(modules_hpp) \
A2AVectors.hpp \ A2AVectors.hpp \
A2AMatrix.hpp \ A2AMatrix.hpp \
Application.hpp \ Application.hpp \
@ -33,4 +33,5 @@ nobase_libHadrons_a_HEADERS = \
ModuleFactory.hpp \ ModuleFactory.hpp \
Solver.hpp \ Solver.hpp \
TimerArray.hpp \ TimerArray.hpp \
VirtualMachine.hpp VirtualMachine.hpp \
$(modules_hpp)

View File

@ -144,7 +144,7 @@ if (env().getGrid()->IsBoss() and !ioStem.empty())\
{\ {\
makeFileDir(ioStem, env().getGrid());\ makeFileDir(ioStem, env().getGrid());\
{\ {\
ResultWriter _writer(RESULT_FILE_NAME(ioStem));\ ResultWriter _writer(RESULT_FILE_NAME(ioStem, vm().getTrajectory()));\
write(_writer, name, result);\ write(_writer, name, result);\
}\ }\
} }

View File

@ -24,14 +24,17 @@
#include <Hadrons/Modules/MSolver/Guesser.hpp> #include <Hadrons/Modules/MSolver/Guesser.hpp>
#include <Hadrons/Modules/MSolver/RBPrecCG.hpp> #include <Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Hadrons/Modules/MSolver/A2AVectors.hpp> #include <Hadrons/Modules/MSolver/A2AVectors.hpp>
#include <Hadrons/Modules/MSolver/A2AAslashVectors.hpp>
#include <Hadrons/Modules/MGauge/UnitEm.hpp> #include <Hadrons/Modules/MGauge/UnitEm.hpp>
#include <Hadrons/Modules/MGauge/StoutSmearing.hpp> #include <Hadrons/Modules/MGauge/StoutSmearing.hpp>
#include <Hadrons/Modules/MGauge/Unit.hpp> #include <Hadrons/Modules/MGauge/Unit.hpp>
#include <Hadrons/Modules/MGauge/Electrify.hpp>
#include <Hadrons/Modules/MGauge/Random.hpp> #include <Hadrons/Modules/MGauge/Random.hpp>
#include <Hadrons/Modules/MGauge/GaugeFix.hpp> #include <Hadrons/Modules/MGauge/GaugeFix.hpp>
#include <Hadrons/Modules/MGauge/FundtoHirep.hpp> #include <Hadrons/Modules/MGauge/FundtoHirep.hpp>
#include <Hadrons/Modules/MGauge/StochEm.hpp> #include <Hadrons/Modules/MGauge/StochEm.hpp>
#include <Hadrons/Modules/MNoise/TimeDilutedSpinColorDiagonal.hpp> #include <Hadrons/Modules/MNoise/TimeDilutedSpinColorDiagonal.hpp>
#include <Hadrons/Modules/MNoise/FullVolumeSpinColorDiagonal.hpp>
#include <Hadrons/Modules/MUtilities/PrecisionCast.hpp> #include <Hadrons/Modules/MUtilities/PrecisionCast.hpp>
#include <Hadrons/Modules/MUtilities/RandomVectors.hpp> #include <Hadrons/Modules/MUtilities/RandomVectors.hpp>
#include <Hadrons/Modules/MUtilities/TestSeqGamma.hpp> #include <Hadrons/Modules/MUtilities/TestSeqGamma.hpp>
@ -65,6 +68,7 @@
#include <Hadrons/Modules/MScalarSUN/TrKinetic.hpp> #include <Hadrons/Modules/MScalarSUN/TrKinetic.hpp>
#include <Hadrons/Modules/MIO/LoadEigenPack.hpp> #include <Hadrons/Modules/MIO/LoadEigenPack.hpp>
#include <Hadrons/Modules/MIO/LoadNersc.hpp> #include <Hadrons/Modules/MIO/LoadNersc.hpp>
#include <Hadrons/Modules/MIO/LoadA2AVectors.hpp>
#include <Hadrons/Modules/MIO/LoadCosmHol.hpp> #include <Hadrons/Modules/MIO/LoadCosmHol.hpp>
#include <Hadrons/Modules/MIO/LoadCoarseEigenPack.hpp> #include <Hadrons/Modules/MIO/LoadCoarseEigenPack.hpp>
#include <Hadrons/Modules/MIO/LoadBinary.hpp> #include <Hadrons/Modules/MIO/LoadBinary.hpp>

View File

@ -32,4 +32,6 @@ using namespace Hadrons;
using namespace MAction; using namespace MAction;
template class Grid::Hadrons::MAction::TDWF<FIMPL>; template class Grid::Hadrons::MAction::TDWF<FIMPL>;
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
template class Grid::Hadrons::MAction::TDWF<FIMPLF>; template class Grid::Hadrons::MAction::TDWF<FIMPLF>;
#endif

View File

@ -49,7 +49,8 @@ public:
unsigned int, Ls, unsigned int, Ls,
double , mass, double , mass,
double , M5, double , M5,
std::string , boundary); std::string , boundary,
std::string , twist);
}; };
template <typename FImpl> template <typename FImpl>
@ -73,7 +74,9 @@ protected:
}; };
MODULE_REGISTER_TMP(DWF, TDWF<FIMPL>, MAction); MODULE_REGISTER_TMP(DWF, TDWF<FIMPL>, MAction);
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
MODULE_REGISTER_TMP(DWFF, TDWF<FIMPLF>, MAction); MODULE_REGISTER_TMP(DWFF, TDWF<FIMPLF>, MAction);
#endif
/****************************************************************************** /******************************************************************************
* DWF template implementation * * DWF template implementation *
@ -117,8 +120,9 @@ void TDWF<FImpl>::setup(void)
auto &grb4 = *envGetRbGrid(FermionField); auto &grb4 = *envGetRbGrid(FermionField);
auto &g5 = *envGetGrid(FermionField, par().Ls); auto &g5 = *envGetGrid(FermionField, par().Ls);
auto &grb5 = *envGetRbGrid(FermionField, par().Ls); auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary); typename DomainWallFermion<FImpl>::ImplParams implParams;
typename DomainWallFermion<FImpl>::ImplParams implParams(boundary); implParams.boundary_phases = strToVec<Complex>(par().boundary);
implParams.twist_n_2pi_L = strToVec<Real>(par().twist);
envCreateDerived(FMat, DomainWallFermion<FImpl>, getName(), par().Ls, U, g5, envCreateDerived(FMat, DomainWallFermion<FImpl>, getName(), par().Ls, U, g5,
grb5, g4, grb4, par().mass, par().M5, implParams); grb5, g4, grb4, par().mass, par().M5, implParams);
} }

View File

@ -32,4 +32,6 @@ using namespace Hadrons;
using namespace MAction; using namespace MAction;
template class Grid::Hadrons::MAction::TMobiusDWF<FIMPL>; template class Grid::Hadrons::MAction::TMobiusDWF<FIMPL>;
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
template class Grid::Hadrons::MAction::TMobiusDWF<FIMPLF>; template class Grid::Hadrons::MAction::TMobiusDWF<FIMPLF>;
#endif

View File

@ -49,7 +49,8 @@ public:
double , M5, double , M5,
double , b, double , b,
double , c, double , c,
std::string , boundary); std::string , boundary,
std::string , twist);
}; };
template <typename FImpl> template <typename FImpl>
@ -72,7 +73,9 @@ public:
}; };
MODULE_REGISTER_TMP(MobiusDWF, TMobiusDWF<FIMPL>, MAction); MODULE_REGISTER_TMP(MobiusDWF, TMobiusDWF<FIMPL>, MAction);
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
MODULE_REGISTER_TMP(MobiusDWFF, TMobiusDWF<FIMPLF>, MAction); MODULE_REGISTER_TMP(MobiusDWFF, TMobiusDWF<FIMPLF>, MAction);
#endif
/****************************************************************************** /******************************************************************************
* TMobiusDWF implementation * * TMobiusDWF implementation *
@ -117,8 +120,9 @@ void TMobiusDWF<FImpl>::setup(void)
auto &grb4 = *envGetRbGrid(FermionField); auto &grb4 = *envGetRbGrid(FermionField);
auto &g5 = *envGetGrid(FermionField, par().Ls); auto &g5 = *envGetGrid(FermionField, par().Ls);
auto &grb5 = *envGetRbGrid(FermionField, par().Ls); auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary); typename MobiusFermion<FImpl>::ImplParams implParams;
typename MobiusFermion<FImpl>::ImplParams implParams(boundary); implParams.boundary_phases = strToVec<Complex>(par().boundary);
implParams.twist_n_2pi_L = strToVec<Real>(par().twist);
envCreateDerived(FMat, MobiusFermion<FImpl>, getName(), par().Ls, U, g5, envCreateDerived(FMat, MobiusFermion<FImpl>, getName(), par().Ls, U, g5,
grb5, g4, grb4, par().mass, par().M5, par().b, par().c, grb5, g4, grb4, par().mass, par().M5, par().b, par().c,
implParams); implParams);

View File

@ -32,4 +32,6 @@ using namespace Hadrons;
using namespace MAction; using namespace MAction;
template class Grid::Hadrons::MAction::TScaledDWF<FIMPL>; template class Grid::Hadrons::MAction::TScaledDWF<FIMPL>;
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
template class Grid::Hadrons::MAction::TScaledDWF<FIMPLF>; template class Grid::Hadrons::MAction::TScaledDWF<FIMPLF>;
#endif

View File

@ -48,7 +48,8 @@ public:
double , mass, double , mass,
double , M5, double , M5,
double , scale, double , scale,
std::string , boundary); std::string , boundary,
std::string , twist);
}; };
template <typename FImpl> template <typename FImpl>
@ -71,7 +72,9 @@ public:
}; };
MODULE_REGISTER_TMP(ScaledDWF, TScaledDWF<FIMPL>, MAction); MODULE_REGISTER_TMP(ScaledDWF, TScaledDWF<FIMPL>, MAction);
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
MODULE_REGISTER_TMP(ScaledDWFF, TScaledDWF<FIMPLF>, MAction); MODULE_REGISTER_TMP(ScaledDWFF, TScaledDWF<FIMPLF>, MAction);
#endif
/****************************************************************************** /******************************************************************************
* TScaledDWF implementation * * TScaledDWF implementation *
@ -116,8 +119,9 @@ void TScaledDWF<FImpl>::setup(void)
auto &grb4 = *envGetRbGrid(FermionField); auto &grb4 = *envGetRbGrid(FermionField);
auto &g5 = *envGetGrid(FermionField, par().Ls); auto &g5 = *envGetGrid(FermionField, par().Ls);
auto &grb5 = *envGetRbGrid(FermionField, par().Ls); auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary); typename ScaledShamirFermion<FImpl>::ImplParams implParams;
typename MobiusFermion<FImpl>::ImplParams implParams(boundary); implParams.boundary_phases = strToVec<Complex>(par().boundary);
implParams.twist_n_2pi_L = strToVec<Real>(par().twist);
envCreateDerived(FMat, ScaledShamirFermion<FImpl>, getName(), par().Ls, U, g5, envCreateDerived(FMat, ScaledShamirFermion<FImpl>, getName(), par().Ls, U, g5,
grb5, g4, grb4, par().mass, par().M5, par().scale, grb5, g4, grb4, par().mass, par().M5, par().scale,
implParams); implParams);

View File

@ -32,4 +32,6 @@ using namespace Hadrons;
using namespace MAction; using namespace MAction;
template class Grid::Hadrons::MAction::TWilson<FIMPL>; template class Grid::Hadrons::MAction::TWilson<FIMPL>;
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
template class Grid::Hadrons::MAction::TWilson<FIMPLF>; template class Grid::Hadrons::MAction::TWilson<FIMPLF>;
#endif

View File

@ -47,7 +47,9 @@ public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar,
std::string, gauge, std::string, gauge,
double , mass, double , mass,
std::string, boundary); std::string, boundary,
std::string, string,
std::string, twist);
}; };
template <typename FImpl> template <typename FImpl>
@ -71,7 +73,9 @@ protected:
}; };
MODULE_REGISTER_TMP(Wilson, TWilson<FIMPL>, MAction); MODULE_REGISTER_TMP(Wilson, TWilson<FIMPL>, MAction);
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
MODULE_REGISTER_TMP(WilsonF, TWilson<FIMPLF>, MAction); MODULE_REGISTER_TMP(WilsonF, TWilson<FIMPLF>, MAction);
#endif
/****************************************************************************** /******************************************************************************
* TWilson template implementation * * TWilson template implementation *
@ -111,8 +115,9 @@ void TWilson<FImpl>::setup(void)
auto &U = envGet(GaugeField, par().gauge); auto &U = envGet(GaugeField, par().gauge);
auto &grid = *envGetGrid(FermionField); auto &grid = *envGetGrid(FermionField);
auto &gridRb = *envGetRbGrid(FermionField); auto &gridRb = *envGetRbGrid(FermionField);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary); typename WilsonFermion<FImpl>::ImplParams implParams;
typename WilsonFermion<FImpl>::ImplParams implParams(boundary); implParams.boundary_phases = strToVec<Complex>(par().boundary);
implParams.twist_n_2pi_L = strToVec<Real>(par().twist);
envCreateDerived(FMat, WilsonFermion<FImpl>, getName(), 1, U, grid, gridRb, envCreateDerived(FMat, WilsonFermion<FImpl>, getName(), 1, U, grid, gridRb,
par().mass, implParams); par().mass, implParams);
} }

View File

@ -32,4 +32,6 @@ using namespace Hadrons;
using namespace MAction; using namespace MAction;
template class Grid::Hadrons::MAction::TWilsonClover<FIMPL>; template class Grid::Hadrons::MAction::TWilsonClover<FIMPL>;
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
template class Grid::Hadrons::MAction::TWilsonClover<FIMPLF>; template class Grid::Hadrons::MAction::TWilsonClover<FIMPLF>;
#endif

View File

@ -51,7 +51,8 @@ public:
double , csw_r, double , csw_r,
double , csw_t, double , csw_t,
WilsonAnisotropyCoefficients ,clover_anisotropy, WilsonAnisotropyCoefficients ,clover_anisotropy,
std::string, boundary std::string, boundary,
std::string, twist
); );
}; };
@ -75,7 +76,9 @@ public:
}; };
MODULE_REGISTER_TMP(WilsonClover, TWilsonClover<FIMPL>, MAction); MODULE_REGISTER_TMP(WilsonClover, TWilsonClover<FIMPL>, MAction);
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
MODULE_REGISTER_TMP(WilsonCloverF, TWilsonClover<FIMPLF>, MAction); MODULE_REGISTER_TMP(WilsonCloverF, TWilsonClover<FIMPLF>, MAction);
#endif
/****************************************************************************** /******************************************************************************
* TWilsonClover template implementation * * TWilsonClover template implementation *
@ -117,8 +120,9 @@ void TWilsonClover<FImpl>::setup(void)
auto &U = envGet(GaugeField, par().gauge); auto &U = envGet(GaugeField, par().gauge);
auto &grid = *envGetGrid(FermionField); auto &grid = *envGetGrid(FermionField);
auto &gridRb = *envGetRbGrid(FermionField); auto &gridRb = *envGetRbGrid(FermionField);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary); typename WilsonCloverFermion<FImpl>::ImplParams implParams;
typename WilsonCloverFermion<FImpl>::ImplParams implParams(boundary); implParams.boundary_phases = strToVec<Complex>(par().boundary);
implParams.twist_n_2pi_L = strToVec<Real>(par().twist);
envCreateDerived(FMat, WilsonCloverFermion<FImpl>, getName(), 1, U, grid, envCreateDerived(FMat, WilsonCloverFermion<FImpl>, getName(), 1, U, grid,
gridRb, par().mass, par().csw_r, par().csw_t, gridRb, par().mass, par().csw_r, par().csw_t,
par().clover_anisotropy, implParams); par().clover_anisotropy, implParams);

View File

@ -32,4 +32,6 @@ using namespace Hadrons;
using namespace MAction; using namespace MAction;
template class Grid::Hadrons::MAction::TZMobiusDWF<ZFIMPL>; template class Grid::Hadrons::MAction::TZMobiusDWF<ZFIMPL>;
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
template class Grid::Hadrons::MAction::TZMobiusDWF<ZFIMPLF>; template class Grid::Hadrons::MAction::TZMobiusDWF<ZFIMPLF>;
#endif

View File

@ -50,7 +50,8 @@ public:
double , b, double , b,
double , c, double , c,
std::vector<std::complex<double>>, omega, std::vector<std::complex<double>>, omega,
std::string , boundary); std::string , boundary,
std::string , twist);
}; };
template <typename FImpl> template <typename FImpl>
@ -73,7 +74,9 @@ public:
}; };
MODULE_REGISTER_TMP(ZMobiusDWF, TZMobiusDWF<ZFIMPL>, MAction); MODULE_REGISTER_TMP(ZMobiusDWF, TZMobiusDWF<ZFIMPL>, MAction);
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
MODULE_REGISTER_TMP(ZMobiusDWFF, TZMobiusDWF<ZFIMPLF>, MAction); MODULE_REGISTER_TMP(ZMobiusDWFF, TZMobiusDWF<ZFIMPLF>, MAction);
#endif
/****************************************************************************** /******************************************************************************
* TZMobiusDWF implementation * * TZMobiusDWF implementation *
@ -125,8 +128,9 @@ void TZMobiusDWF<FImpl>::setup(void)
auto &g5 = *envGetGrid(FermionField, par().Ls); auto &g5 = *envGetGrid(FermionField, par().Ls);
auto &grb5 = *envGetRbGrid(FermionField, par().Ls); auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
auto omega = par().omega; auto omega = par().omega;
std::vector<Complex> boundary = strToVec<Complex>(par().boundary); typename ZMobiusFermion<FImpl>::ImplParams implParams;
typename ZMobiusFermion<FImpl>::ImplParams implParams(boundary); implParams.boundary_phases = strToVec<Complex>(par().boundary);
implParams.twist_n_2pi_L = strToVec<Real>(par().twist);
envCreateDerived(FMat, ZMobiusFermion<FImpl>, getName(), par().Ls, U, g5, envCreateDerived(FMat, ZMobiusFermion<FImpl>, getName(), par().Ls, U, g5,
grb5, g4, grb4, par().mass, par().M5, omega, grb5, g4, grb4, par().mass, par().M5, omega,
par().b, par().c, implParams); par().b, par().c, implParams);

View File

@ -33,10 +33,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Hadrons/ModuleFactory.hpp> #include <Hadrons/ModuleFactory.hpp>
#include <Hadrons/A2AMatrix.hpp> #include <Hadrons/A2AMatrix.hpp>
#ifndef ASF_IO_TYPE
#define ASF_IO_TYPE ComplexF
#endif
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
/****************************************************************************** /******************************************************************************
@ -113,7 +109,7 @@ public:
typedef A2AMatrixBlockComputation<Complex, typedef A2AMatrixBlockComputation<Complex,
FermionField, FermionField,
A2AAslashFieldMetadata, A2AAslashFieldMetadata,
ASF_IO_TYPE> Computation; HADRONS_A2AM_IO_TYPE> Computation;
typedef AslashFieldKernel<Complex, FImpl> Kernel; typedef AslashFieldKernel<Complex, FImpl> Kernel;
public: public:
// constructor // constructor
@ -196,7 +192,7 @@ void TA2AAslashField<FImpl, PhotonImpl>::execute(void)
LOG(Message) << " " << name << std::endl; LOG(Message) << " " << name << std::endl;
} }
LOG(Message) << "A-slash field size: " << nt << "*" << N_i << "*" << N_j LOG(Message) << "A-slash field size: " << nt << "*" << N_i << "*" << N_j
<< " (filesize " << sizeString(nt*N_i*N_j*sizeof(ASF_IO_TYPE)) << " (filesize " << sizeString(nt*N_i*N_j*sizeof(HADRONS_A2AM_IO_TYPE))
<< "/EM field)" << std::endl; << "/EM field)" << std::endl;
// preparing "B" complexified fields // preparing "B" complexified fields

View File

@ -35,10 +35,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Hadrons/ModuleFactory.hpp> #include <Hadrons/ModuleFactory.hpp>
#include <Hadrons/A2AMatrix.hpp> #include <Hadrons/A2AMatrix.hpp>
#ifndef MF_IO_TYPE
#define MF_IO_TYPE ComplexF
#endif
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
/****************************************************************************** /******************************************************************************
@ -118,7 +114,7 @@ public:
typedef A2AMatrixBlockComputation<Complex, typedef A2AMatrixBlockComputation<Complex,
FermionField, FermionField,
A2AMesonFieldMetadata, A2AMesonFieldMetadata,
MF_IO_TYPE> Computation; HADRONS_A2AM_IO_TYPE> Computation;
typedef MesonFieldKernel<Complex, FImpl> Kernel; typedef MesonFieldKernel<Complex, FImpl> Kernel;
public: public:
// constructor // constructor
@ -248,7 +244,7 @@ void TA2AMesonField<FImpl>::execute(void)
LOG(Message) << " " << g << std::endl; LOG(Message) << " " << g << std::endl;
} }
LOG(Message) << "Meson field size: " << nt << "*" << N_i << "*" << N_j LOG(Message) << "Meson field size: " << nt << "*" << N_i << "*" << N_j
<< " (filesize " << sizeString(nt*N_i*N_j*sizeof(MF_IO_TYPE)) << " (filesize " << sizeString(nt*N_i*N_j*sizeof(HADRONS_A2AM_IO_TYPE))
<< "/momentum/bilinear)" << std::endl; << "/momentum/bilinear)" << std::endl;
auto &ph = envGet(std::vector<ComplexField>, momphName_); auto &ph = envGet(std::vector<ComplexField>, momphName_);

View File

@ -0,0 +1,34 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MGauge/Electrify.cc
Copyright (C) 2015-2018
Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Modules/MGauge/Electrify.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MGauge;
template class Grid::Hadrons::MGauge::TElectrify<GIMPL>;

View File

@ -0,0 +1,151 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MGauge/Electrify.hpp
Copyright (C) 2015-2018
Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MGauge_Electrify_hpp_
#define Hadrons_MGauge_Electrify_hpp_
#include <Hadrons/Global.hpp>
#include <Hadrons/Module.hpp>
#include <Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Electrify gauge *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MGauge)
/****************************************************************************
* Electrify a gauge field:
*
* Ue_mu(x) = U_mu(x)*exp(ieqA_mu(x))
*
* with
*
* - gauge: U_mu(x): gauge field
* - emField: A_mu(x): electromagnetic photon field
* - e: value for the elementary charge
* - q: charge in units of e
*
*****************************************************************************/
class ElectrifyPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(ElectrifyPar,
std::string, gauge,
std::string, emField,
double, e,
double, charge);
};
template <typename GImpl>
class TElectrify: public Module<ElectrifyPar>
{
public:
GAUGE_TYPE_ALIASES(GImpl,);
public:
typedef PhotonR::GaugeField EmField;
public:
// constructor
TElectrify(const std::string name);
// destructor
virtual ~TElectrify(void) {};
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_TMP(Electrify, TElectrify<GIMPL>, MGauge);
/******************************************************************************
* TElectrify implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename GImpl>
TElectrify<GImpl>::TElectrify(const std::string name)
: Module<ElectrifyPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename GImpl>
std::vector<std::string> TElectrify<GImpl>::getInput(void)
{
std::vector<std::string> in = {par().gauge, par().emField};
return in;
}
template <typename GImpl>
std::vector<std::string> TElectrify<GImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename GImpl>
void TElectrify<GImpl>::setup(void)
{
envCreateLat(GaugeField, getName());
envTmpLat(LatticeComplex, "eiAmu");
}
// execution ///////////////////////////////////////////////////////////////////
template <typename GImpl>
void TElectrify<GImpl>::execute(void)
{
LOG(Message) << "Electrify the gauge field " << par().gauge << " using the photon field "
<< par().emField << " with charge e*q= " << par().e << "*" << par().charge << std::endl;
auto &Ue = envGet(GaugeField, getName());
auto &U = envGet(GaugeField, par().gauge);
auto &A = envGet(EmField, par().emField);
envGetTmp(LatticeComplex, eiAmu);
Complex i(0.0,1.0);
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
eiAmu = exp(i * (Real)(par().e * par().charge) * PeekIndex<LorentzIndex>(A, mu));
PokeIndex<LorentzIndex>(Ue, PeekIndex<LorentzIndex>(U, mu) * eiAmu, mu);
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MGauge_Electrify_hpp_

View File

@ -0,0 +1,34 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MIO/LoadA2AVectors.cc
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Modules/MIO/LoadA2AVectors.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MIO;
template class Grid::Hadrons::MIO::TLoadA2AVectors<FIMPL>;

View File

@ -0,0 +1,120 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MIO/LoadA2AVectors.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MIO_LoadA2AVectors_hpp_
#define Hadrons_MIO_LoadA2AVectors_hpp_
#include <Hadrons/Global.hpp>
#include <Hadrons/Module.hpp>
#include <Hadrons/ModuleFactory.hpp>
#include <Hadrons/A2AVectors.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Module to load all-to-all vectors *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MIO)
class LoadA2AVectorsPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LoadA2AVectorsPar,
std::string, filestem,
bool, multiFile,
unsigned int, size);
};
template <typename FImpl>
class TLoadA2AVectors: public Module<LoadA2AVectorsPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
public:
// constructor
TLoadA2AVectors(const std::string name);
// destructor
virtual ~TLoadA2AVectors(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_TMP(LoadA2AVectors, TLoadA2AVectors<FIMPL>, MIO);
/******************************************************************************
* TLoadA2AVectors implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TLoadA2AVectors<FImpl>::TLoadA2AVectors(const std::string name)
: Module<LoadA2AVectorsPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TLoadA2AVectors<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> TLoadA2AVectors<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TLoadA2AVectors<FImpl>::setup(void)
{
envCreate(std::vector<FermionField>, getName(), 1, par().size,
envGetGrid(FermionField));
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TLoadA2AVectors<FImpl>::execute(void)
{
auto &vec = envGet(std::vector<FermionField>, getName());
A2AVectorsIo::read(vec, par().filestem, par().multiFile, vm().getTrajectory());
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MIO_LoadA2AVectors_hpp_

View File

@ -32,4 +32,6 @@ using namespace Hadrons;
using namespace MIO; using namespace MIO;
template class Grid::Hadrons::MIO::TLoadEigenPack<FermionEigenPack<FIMPL>>; template class Grid::Hadrons::MIO::TLoadEigenPack<FermionEigenPack<FIMPL>>;
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
template class Grid::Hadrons::MIO::TLoadEigenPack<FermionEigenPack<FIMPL, FIMPLF>>; template class Grid::Hadrons::MIO::TLoadEigenPack<FermionEigenPack<FIMPL, FIMPLF>>;
#endif

View File

@ -72,7 +72,9 @@ public:
}; };
MODULE_REGISTER_TMP(LoadFermionEigenPack, TLoadEigenPack<FermionEigenPack<FIMPL>>, MIO); MODULE_REGISTER_TMP(LoadFermionEigenPack, TLoadEigenPack<FermionEigenPack<FIMPL>>, MIO);
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
MODULE_REGISTER_TMP(LoadFermionEigenPackIo32, ARG(TLoadEigenPack<FermionEigenPack<FIMPL, FIMPLF>>), MIO); MODULE_REGISTER_TMP(LoadFermionEigenPackIo32, ARG(TLoadEigenPack<FermionEigenPack<FIMPL, FIMPLF>>), MIO);
#endif
/****************************************************************************** /******************************************************************************
* TLoadEigenPack implementation * * TLoadEigenPack implementation *

View File

@ -6,6 +6,7 @@ Source file: Hadrons/Modules/MNPR/Amputate.cc
Copyright (C) 2015-2018 Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify

View File

@ -6,6 +6,7 @@ Source file: Hadrons/Modules/MNPR/Bilinear.cc
Copyright (C) 2015-2018 Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify

View File

@ -6,6 +6,7 @@ Source file: Hadrons/Modules/MNPR/FourQuark.cc
Copyright (C) 2015-2018 Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify

View File

@ -0,0 +1,36 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MNoise/FullVolumeSpinColorDiagonal.cc
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Modules/MNoise/FullVolumeSpinColorDiagonal.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MNoise;
template class Grid::Hadrons::MNoise::TFullVolumeSpinColorDiagonal<FIMPL>;
template class Grid::Hadrons::MNoise::TFullVolumeSpinColorDiagonal<ZFIMPL>;

View File

@ -0,0 +1,121 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MNoise/FullVolumeSpinColorDiagonal.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MNoise_FullVolumeSpinColorDiagonal_hpp_
#define Hadrons_MNoise_FullVolumeSpinColorDiagonal_hpp_
#include <Hadrons/Global.hpp>
#include <Hadrons/Module.hpp>
#include <Hadrons/ModuleFactory.hpp>
#include <Hadrons/DilutedNoise.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Generate full volume spin-color diagonal noise *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MNoise)
class FullVolumeSpinColorDiagonalPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(FullVolumeSpinColorDiagonalPar,
unsigned int, nsrc);
};
template <typename FImpl>
class TFullVolumeSpinColorDiagonal: public Module<FullVolumeSpinColorDiagonalPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
public:
// constructor
TFullVolumeSpinColorDiagonal(const std::string name);
// destructor
virtual ~TFullVolumeSpinColorDiagonal(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_TMP(FullVolumeSpinColorDiagonal, TFullVolumeSpinColorDiagonal<FIMPL>, MNoise);
MODULE_REGISTER_TMP(ZFullVolumeSpinColorDiagonal, TFullVolumeSpinColorDiagonal<ZFIMPL>, MNoise);
/******************************************************************************
* TFullVolumeSpinColorDiagonal implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TFullVolumeSpinColorDiagonal<FImpl>::TFullVolumeSpinColorDiagonal(const std::string name)
: Module<FullVolumeSpinColorDiagonalPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TFullVolumeSpinColorDiagonal<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> TFullVolumeSpinColorDiagonal<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TFullVolumeSpinColorDiagonal<FImpl>::setup(void)
{
envCreateDerived(DilutedNoise<FImpl>,
FullVolumeSpinColorDiagonalNoise<FImpl>,
getName(), 1, envGetGrid(FermionField), par().nsrc);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TFullVolumeSpinColorDiagonal<FImpl>::execute(void)
{
auto &noise = envGet(DilutedNoise<FImpl>, getName());
LOG(Message) << "Generating full volume, spin-color diagonal noise" << std::endl;
noise.generateNoise(rng4d());
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MNoise_FullVolumeSpinColorDiagonal_hpp_

View File

@ -146,7 +146,7 @@ void TChargedProp::execute(void)
std::vector<int> siteCoor; std::vector<int> siteCoor;
LOG(Message) << "Saving momentum-projected propagator to '" LOG(Message) << "Saving momentum-projected propagator to '"
<< RESULT_FILE_NAME(par().output) << "'..." << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
<< std::endl; << std::endl;
result.projection.resize(par().outputMom.size()); result.projection.resize(par().outputMom.size());
result.lattice_size = env().getGrid()->_fdimensions; result.lattice_size = env().getGrid()->_fdimensions;

View File

@ -462,7 +462,7 @@ void TScalarVP::execute(void)
if (!par().output.empty()) if (!par().output.empty())
{ {
LOG(Message) << "Saving momentum-projected HVP to '" LOG(Message) << "Saving momentum-projected HVP to '"
<< RESULT_FILE_NAME(par().output) << "'..." << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
<< std::endl; << std::endl;
saveResult(par().output, "HVP", outputData); saveResult(par().output, "HVP", outputData);
} }

View File

@ -239,7 +239,7 @@ void TVPCounterTerms::execute(void)
if (!par().output.empty()) if (!par().output.empty())
{ {
LOG(Message) << "Saving momentum-projected correlators to '" LOG(Message) << "Saving momentum-projected correlators to '"
<< RESULT_FILE_NAME(par().output) << "'..." << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
<< std::endl; << std::endl;
saveResult(par().output, "scalar_loops", outputData); saveResult(par().output, "scalar_loops", outputData);
} }

View File

@ -0,0 +1,35 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MSolver/A2AAslashVectors.cc
Copyright (C) 2015-2018
Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Modules/MSolver/A2AAslashVectors.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MSolver;
template class Grid::Hadrons::MSolver::TA2AAslashVectors<FIMPL>;
template class Grid::Hadrons::MSolver::TA2AAslashVectors<ZFIMPL>;

View File

@ -0,0 +1,194 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MSolver/A2AAslashVectors.hpp
Copyright (C) 2015-2018
Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MSolver_A2AAslashVectors_hpp_
#define Hadrons_MSolver_A2AAslashVectors_hpp_
#include <Hadrons/Global.hpp>
#include <Hadrons/Module.hpp>
#include <Hadrons/ModuleFactory.hpp>
#include <Hadrons/Solver.hpp>
#include <Hadrons/A2AVectors.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Create all-to-all V & W vectors *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSolver)
/****************************************************************************
* Calculate a sequential propagator on an insertion of i*g_mu*A_mu
* on an A2A vector
*
* vv_i(y) = S(y,x) * i * g_mu*A_mu(x) * v_i(x)
*
* with
*
* - vector: A2A vector v_i(x)
* - emField: A_mu(x): electromagnetic photon field
* - solver: the solver for calculating the sequential propagator
*
*****************************************************************************/
class A2AAslashVectorsPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashVectorsPar,
std::string, vector,
std::string, emField,
std::string, solver,
std::string, output,
bool, multiFile);
};
template <typename FImpl>
class TA2AAslashVectors : public Module<A2AAslashVectorsPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
SOLVER_TYPE_ALIASES(FImpl,);
public:
typedef PhotonR::GaugeField EmField;
public:
// constructor
TA2AAslashVectors(const std::string name);
// destructor
virtual ~TA2AAslashVectors(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
unsigned int Ls_;
};
MODULE_REGISTER_TMP(A2AAslashVectors, TA2AAslashVectors<FIMPL>, MSolver);
MODULE_REGISTER_TMP(ZA2AAslashVectors, TA2AAslashVectors<ZFIMPL>, MSolver);
/******************************************************************************
* TA2AAslashVectors implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TA2AAslashVectors<FImpl>::TA2AAslashVectors(const std::string name)
: Module<A2AAslashVectorsPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TA2AAslashVectors<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().vector, par().emField, par().solver};
return in;
}
template <typename FImpl>
std::vector<std::string> TA2AAslashVectors<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TA2AAslashVectors<FImpl>::setup(void)
{
Ls_ = env().getObjectLs(par().solver);
auto &vvector = envGet(std::vector<FermionField>, par().vector);
unsigned int Nmodes = vvector.size();
envCreate(std::vector<FermionField>, getName(), 1,
Nmodes, envGetGrid(FermionField));
envTmpLat(FermionField, "v4dtmp");
envTmpLat(FermionField, "v5dtmp", Ls_);
envTmpLat(FermionField, "v5dtmp_sol", Ls_);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TA2AAslashVectors<FImpl>::execute(void)
{
auto &solver = envGet(Solver, par().solver);
auto &stoch_photon = envGet(EmField, par().emField);
auto &vvector = envGet(std::vector<FermionField>, par().vector);
auto &Aslashv = envGet(std::vector<FermionField>, getName());
unsigned int Nmodes = vvector.size();
auto &mat = solver.getFMat();
envGetTmp(FermionField, v4dtmp);
envGetTmp(FermionField, v5dtmp);
envGetTmp(FermionField, v5dtmp_sol);
Complex ci(0.0,1.0);
startTimer("Seq Aslash");
LOG(Message) << "Calculate Sequential propagator on Aslash * v with the A2A vector "
<< par().vector << " and the photon field " << par().emField << std::endl;
for(unsigned int i=0; i<Nmodes; i++)
{
v4dtmp = zero;
startTimer("Multiply Aslash");
for(unsigned int mu=0;mu<=3;mu++)
{
Gamma gmu(Gamma::gmu[mu]);
v4dtmp += ci * PeekIndex<LorentzIndex>(stoch_photon, mu) * (gmu * vvector[i]);
}
stopTimer("Multiply Aslash");
startTimer("Inversion");
if (Ls_ == 1)
{
solver(Aslashv[i], v4dtmp);
}
else
{
mat.ImportPhysicalFermionSource(v4dtmp, v5dtmp);
solver(v5dtmp_sol, v5dtmp);
mat.ExportPhysicalFermionSolution(v5dtmp_sol, v4dtmp);
Aslashv[i] = v4dtmp;
}
stopTimer("Inversion");
}
stopTimer("Seq Aslash");
if (!par().output.empty())
{
startTimer("I/O");
A2AVectorsIo::write(par().output, Aslashv, par().multiFile, vm().getTrajectory());
stopTimer("I/O");
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MSolver_A2AAslashVectors_hpp_

View File

@ -51,7 +51,9 @@ public:
std::string, noise, std::string, noise,
std::string, action, std::string, action,
std::string, eigenPack, std::string, eigenPack,
std::string, solver); std::string, solver,
std::string, output,
bool, multiFile);
}; };
template <typename FImpl, typename Pack> template <typename FImpl, typename Pack>
@ -236,6 +238,17 @@ void TA2AVectors<FImpl, Pack>::execute(void)
} }
stopTimer("W high mode"); stopTimer("W high mode");
} }
// I/O if necessary
if (!par().output.empty())
{
startTimer("V I/O");
A2AVectorsIo::write(par().output + "_v", v, par().multiFile, vm().getTrajectory());
stopTimer("V I/O");
startTimer("W I/O");
A2AVectorsIo::write(par().output + "_w", w, par().multiFile, vm().getTrajectory());
stopTimer("W I/O");
}
} }
END_MODULE_NAMESPACE END_MODULE_NAMESPACE

View File

@ -33,4 +33,7 @@ using namespace MSolver;
template class Grid::Hadrons::MSolver::TLocalCoherenceLanczos<FIMPL,HADRONS_DEFAULT_LANCZOS_NBASIS>; template class Grid::Hadrons::MSolver::TLocalCoherenceLanczos<FIMPL,HADRONS_DEFAULT_LANCZOS_NBASIS>;
template class Grid::Hadrons::MSolver::TLocalCoherenceLanczos<ZFIMPL,HADRONS_DEFAULT_LANCZOS_NBASIS>; template class Grid::Hadrons::MSolver::TLocalCoherenceLanczos<ZFIMPL,HADRONS_DEFAULT_LANCZOS_NBASIS>;
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
template class Grid::Hadrons::MSolver::TLocalCoherenceLanczos<FIMPL,HADRONS_DEFAULT_LANCZOS_NBASIS, FIMPLF>;
template class Grid::Hadrons::MSolver::TLocalCoherenceLanczos<ZFIMPL,HADRONS_DEFAULT_LANCZOS_NBASIS, ZFIMPLF>;
#endif

View File

@ -55,7 +55,7 @@ public:
bool, multiFile); bool, multiFile);
}; };
template <typename FImpl, int nBasis> template <typename FImpl, int nBasis, typename FImplIo = FImpl>
class TLocalCoherenceLanczos: public Module<LocalCoherenceLanczosPar> class TLocalCoherenceLanczos: public Module<LocalCoherenceLanczosPar>
{ {
public: public:
@ -64,7 +64,7 @@ public:
typename FImpl::SiteComplex, typename FImpl::SiteComplex,
nBasis> LCL; nBasis> LCL;
typedef BaseFermionEigenPack<FImpl> BasePack; typedef BaseFermionEigenPack<FImpl> BasePack;
typedef CoarseFermionEigenPack<FImpl, nBasis> CoarsePack; typedef CoarseFermionEigenPack<FImpl, nBasis, FImplIo> CoarsePack;
typedef HADRONS_DEFAULT_SCHUR_OP<FMat, FermionField> SchurFMat; typedef HADRONS_DEFAULT_SCHUR_OP<FMat, FermionField> SchurFMat;
public: public:
// constructor // constructor
@ -82,27 +82,31 @@ public:
MODULE_REGISTER_TMP(LocalCoherenceLanczos, ARG(TLocalCoherenceLanczos<FIMPL, HADRONS_DEFAULT_LANCZOS_NBASIS>), MSolver); MODULE_REGISTER_TMP(LocalCoherenceLanczos, ARG(TLocalCoherenceLanczos<FIMPL, HADRONS_DEFAULT_LANCZOS_NBASIS>), MSolver);
MODULE_REGISTER_TMP(ZLocalCoherenceLanczos, ARG(TLocalCoherenceLanczos<ZFIMPL, HADRONS_DEFAULT_LANCZOS_NBASIS>), MSolver); MODULE_REGISTER_TMP(ZLocalCoherenceLanczos, ARG(TLocalCoherenceLanczos<ZFIMPL, HADRONS_DEFAULT_LANCZOS_NBASIS>), MSolver);
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
MODULE_REGISTER_TMP(LocalCoherenceLanczosIo32, ARG(TLocalCoherenceLanczos<FIMPL, HADRONS_DEFAULT_LANCZOS_NBASIS, FIMPLF>), MSolver);
MODULE_REGISTER_TMP(ZLocalCoherenceLanczosIo32, ARG(TLocalCoherenceLanczos<ZFIMPL, HADRONS_DEFAULT_LANCZOS_NBASIS, ZFIMPLF>), MSolver);
#endif
/****************************************************************************** /******************************************************************************
* TLocalCoherenceLanczos implementation * * TLocalCoherenceLanczos implementation *
******************************************************************************/ ******************************************************************************/
// constructor ///////////////////////////////////////////////////////////////// // constructor /////////////////////////////////////////////////////////////////
template <typename FImpl, int nBasis> template <typename FImpl, int nBasis, typename FImplIo>
TLocalCoherenceLanczos<FImpl, nBasis>::TLocalCoherenceLanczos(const std::string name) TLocalCoherenceLanczos<FImpl, nBasis, FImplIo>::TLocalCoherenceLanczos(const std::string name)
: Module<LocalCoherenceLanczosPar>(name) : Module<LocalCoherenceLanczosPar>(name)
{} {}
// dependencies/products /////////////////////////////////////////////////////// // dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl, int nBasis> template <typename FImpl, int nBasis, typename FImplIo>
std::vector<std::string> TLocalCoherenceLanczos<FImpl, nBasis>::getInput(void) std::vector<std::string> TLocalCoherenceLanczos<FImpl, nBasis, FImplIo>::getInput(void)
{ {
std::vector<std::string> in = {par().action}; std::vector<std::string> in = {par().action};
return in; return in;
} }
template <typename FImpl, int nBasis> template <typename FImpl, int nBasis, typename FImplIo>
std::vector<std::string> TLocalCoherenceLanczos<FImpl, nBasis>::getOutput(void) std::vector<std::string> TLocalCoherenceLanczos<FImpl, nBasis, FImplIo>::getOutput(void)
{ {
std::vector<std::string> out = {getName()}; std::vector<std::string> out = {getName()};
@ -110,8 +114,8 @@ std::vector<std::string> TLocalCoherenceLanczos<FImpl, nBasis>::getOutput(void)
} }
// setup /////////////////////////////////////////////////////////////////////// // setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl, int nBasis> template <typename FImpl, int nBasis, typename FImplIo>
void TLocalCoherenceLanczos<FImpl, nBasis>::setup(void) void TLocalCoherenceLanczos<FImpl, nBasis, FImplIo>::setup(void)
{ {
LOG(Message) << "Setting up local coherence Lanczos eigensolver for" LOG(Message) << "Setting up local coherence Lanczos eigensolver for"
<< " action '" << par().action << "' (" << nBasis << " action '" << par().action << "' (" << nBasis
@ -138,8 +142,8 @@ void TLocalCoherenceLanczos<FImpl, nBasis>::setup(void)
} }
// execution /////////////////////////////////////////////////////////////////// // execution ///////////////////////////////////////////////////////////////////
template <typename FImpl, int nBasis> template <typename FImpl, int nBasis, typename FImplIo>
void TLocalCoherenceLanczos<FImpl, nBasis>::execute(void) void TLocalCoherenceLanczos<FImpl, nBasis, FImplIo>::execute(void)
{ {
auto &finePar = par().fineParams; auto &finePar = par().fineParams;
auto &coarsePar = par().coarseParams; auto &coarsePar = par().coarseParams;

View File

@ -0,0 +1,454 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Utilities/Contractor.cc
Copyright (C) 2015-2018
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Global.hpp>
#include <Hadrons/A2AMatrix.hpp>
#include <Hadrons/DiskVector.hpp>
#include <Hadrons/TimerArray.hpp>
using namespace Grid;
using namespace QCD;
using namespace Hadrons;
#define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt)
namespace Contractor
{
class TrajRange: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
unsigned int, start,
unsigned int, end,
unsigned int, step);
};
class GlobalPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
TrajRange, trajCounter,
unsigned int, nt,
std::string, diskVectorDir,
std::string, output);
};
class A2AMatrixPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar,
std::string, file,
std::string, dataset,
unsigned int, cacheSize,
std::string, name);
};
class ProductPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar,
std::string, terms,
std::vector<std::string>, times,
std::string, translations,
bool, translationAverage);
};
class CorrelatorResult: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult,
std::vector<Contractor::A2AMatrixPar>, a2aMatrix,
ProductPar, contraction,
std::vector<unsigned int>, times,
std::vector<ComplexD>, correlator);
};
}
struct ContractorPar
{
Contractor::GlobalPar global;
std::vector<Contractor::A2AMatrixPar> a2aMatrix;
std::vector<Contractor::ProductPar> product;
};
void makeTimeSeq(std::vector<std::vector<unsigned int>> &timeSeq,
const std::vector<std::set<unsigned int>> &times,
std::vector<unsigned int> &current,
const unsigned int depth)
{
if (depth > 0)
{
for (auto t: times[times.size() - depth])
{
current[times.size() - depth] = t;
makeTimeSeq(timeSeq, times, current, depth - 1);
}
}
else
{
timeSeq.push_back(current);
}
}
void makeTimeSeq(std::vector<std::vector<unsigned int>> &timeSeq,
const std::vector<std::set<unsigned int>> &times)
{
std::vector<unsigned int> current(times.size());
makeTimeSeq(timeSeq, times, current, times.size());
}
void saveCorrelator(const Contractor::CorrelatorResult &result, const std::string dir,
const unsigned int dt, const unsigned int traj)
{
std::string fileStem = "", filename;
std::vector<std::string> terms = strToVec<std::string>(result.contraction.terms);
for (unsigned int i = 0; i < terms.size() - 1; i++)
{
fileStem += terms[i] + "_" + std::to_string(result.times[i]) + "_";
}
fileStem += terms.back();
if (!result.contraction.translationAverage)
{
fileStem += "_dt_" + std::to_string(dt);
}
filename = dir + "/" + RESULT_FILE_NAME(fileStem, traj);
std::cout << "Saving correlator to '" << filename << "'" << std::endl;
makeFileDir(dir);
ResultWriter writer(filename);
write(writer, fileStem, result);
}
std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt)
{
std::regex rex("([0-9]+)|(([0-9]+)\\.\\.([0-9]+))");
std::smatch sm;
std::vector<std::string> rstr = strToVec<std::string>(str);
std::set<unsigned int> tSet;
for (auto &s: rstr)
{
std::regex_match(s, sm, rex);
if (sm[1].matched)
{
unsigned int t;
t = std::stoi(sm[1].str());
if (t >= nt)
{
HADRONS_ERROR(Range, "time out of range (from expression '" + str + "')");
}
tSet.insert(t);
}
else if (sm[2].matched)
{
unsigned int ta, tb;
ta = std::stoi(sm[3].str());
tb = std::stoi(sm[4].str());
if ((ta >= nt) or (tb >= nt))
{
HADRONS_ERROR(Range, "time out of range (from expression '" + str + "')");
}
for (unsigned int ti = ta; ti <= tb; ++ti)
{
tSet.insert(ti);
}
}
}
return tSet;
}
struct Sec
{
Sec(const double usec)
{
seconds = usec/1.0e6;
}
double seconds;
};
inline std::ostream & operator<< (std::ostream& s, const Sec &&sec)
{
s << std::setw(10) << sec.seconds << " sec";
return s;
}
struct Flops
{
Flops(const double flops, const double fusec)
{
gFlopsPerSec = flops/fusec/1.0e3;
}
double gFlopsPerSec;
};
inline std::ostream & operator<< (std::ostream& s, const Flops &&f)
{
s << std::setw(10) << f.gFlopsPerSec << " GFlop/s";
return s;
}
struct Bytes
{
Bytes(const double bytes, const double busec)
{
gBytesPerSec = bytes/busec*1.0e6/1024/1024/1024;
}
double gBytesPerSec;
};
inline std::ostream & operator<< (std::ostream& s, const Bytes &&b)
{
s << std::setw(10) << b.gBytesPerSec << " GB/s";
return s;
}
int main(int argc, char* argv[])
{
// parse command line
std::string parFilename;
if (argc != 2)
{
std::cerr << "usage: " << argv[0] << " <parameter file>";
std::cerr << std::endl;
return EXIT_FAILURE;
}
parFilename = argv[1];
// parse parameter file
ContractorPar par;
unsigned int nMat, nCont;
XmlReader reader(parFilename);
read(reader, "global", par.global);
read(reader, "a2aMatrix", par.a2aMatrix);
read(reader, "product", par.product);
nMat = par.a2aMatrix.size();
nCont = par.product.size();
// create diskvectors
std::map<std::string, EigenDiskVector<ComplexD>> a2aMat;
unsigned int cacheSize;
for (auto &p: par.a2aMatrix)
{
std::string dirName = par.global.diskVectorDir + "/" + p.name;
a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize));
}
// trajectory loop
for (unsigned int traj = par.global.trajCounter.start;
traj < par.global.trajCounter.end; traj += par.global.trajCounter.step)
{
std::cout << ":::::::: Trajectory " << traj << std::endl;
// load data
for (auto &p: par.a2aMatrix)
{
std::string filename = p.file;
double t, size;
tokenReplace(filename, "traj", traj);
std::cout << "======== Loading '" << filename << "'" << std::endl;
A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(filename, p.dataset, par.global.nt);
a2aIo.load(a2aMat.at(p.name), &t);
std::cout << "Read " << a2aIo.getSize() << " bytes in " << t/1.0e6
<< " sec, " << a2aIo.getSize()/t*1.0e6/1024/1024 << " MB/s" << std::endl;
}
// contract
EigenDiskVector<ComplexD>::Matrix buf;
for (auto &p: par.product)
{
std::vector<std::string> term = strToVec<std::string>(p.terms);
std::vector<std::set<unsigned int>> times;
std::vector<std::vector<unsigned int>> timeSeq;
std::set<unsigned int> translations;
std::vector<A2AMatrixTr<ComplexD>> lastTerm(par.global.nt);
A2AMatrix<ComplexD> prod, buf, tmp;
TimerArray tAr;
double fusec, busec, flops, bytes, tusec;
Contractor::CorrelatorResult result;
tAr.startTimer("Total");
std::cout << "======== Contraction tr(";
for (unsigned int g = 0; g < term.size(); ++g)
{
std::cout << term[g] << ((g == term.size() - 1) ? ')' : '*');
}
std::cout << std::endl;
if (term.size() != p.times.size() + 1)
{
HADRONS_ERROR(Size, "number of terms (" + std::to_string(term.size())
+ ") different from number of times ("
+ std::to_string(p.times.size() + 1) + ")");
}
for (auto &s: p.times)
{
times.push_back(parseTimeRange(s, par.global.nt));
}
for (auto &m: par.a2aMatrix)
{
if (std::find(result.a2aMatrix.begin(), result.a2aMatrix.end(), m) == result.a2aMatrix.end())
{
result.a2aMatrix.push_back(m);
tokenReplace(result.a2aMatrix.back().file, "traj", traj);
}
}
result.contraction = p;
result.correlator.resize(par.global.nt, 0.);
translations = parseTimeRange(p.translations, par.global.nt);
makeTimeSeq(timeSeq, times);
std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
<< timeSeq.size()*translations.size()*par.global.nt << " tr(A*B)"
<< std::endl;
std::cout << "* Caching transposed last term" << std::endl;
for (unsigned int t = 0; t < par.global.nt; ++t)
{
tAr.startTimer("Disk vector overhead");
const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
tAr.stopTimer("Disk vector overhead");
tAr.startTimer("Transpose caching");
lastTerm[t].resize(ref.rows(), ref.cols());
parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
for (unsigned int i = 0; i < ref.rows(); ++i)
{
lastTerm[t](i, j) = ref(i, j);
}
tAr.stopTimer("Transpose caching");
}
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
std::cout << Sec(tAr.getDTimer("Transpose caching")) << " "
<< Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
for (unsigned int i = 0; i < timeSeq.size(); ++i)
{
unsigned int dti = 0;
auto &t = timeSeq[i];
result.times = t;
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
{
result.correlator[tLast] = 0.;
}
for (auto &dt: translations)
{
std::cout << "* Step " << i*translations.size() + dti + 1
<< "/" << timeSeq.size()*translations.size()
<< " -- positions= " << t << ", dt= " << dt << std::endl;
if (term.size() > 2)
{
std::cout << std::setw(8) << "products";
}
flops = 0.;
bytes = 0.;
fusec = tAr.getDTimer("A*B algebra");
busec = tAr.getDTimer("A*B total");
tAr.startTimer("Linear algebra");
tAr.startTimer("Disk vector overhead");
prod = a2aMat.at(term[0])[TIME_MOD(t[0] + dt)];
tAr.stopTimer("Disk vector overhead");
for (unsigned int j = 1; j < term.size() - 1; ++j)
{
tAr.startTimer("Disk vector overhead");
const A2AMatrix<ComplexD> &ref = a2aMat.at(term[j])[TIME_MOD(t[j] + dt)];
tAr.stopTimer("Disk vector overhead");
tAr.startTimer("A*B total");
tAr.startTimer("A*B algebra");
A2AContraction::mul(tmp, prod, ref);
tAr.stopTimer("A*B algebra");
flops += A2AContraction::mulFlops(prod, ref);
prod = tmp;
tAr.stopTimer("A*B total");
bytes += 3.*tmp.rows()*tmp.cols()*sizeof(ComplexD);
}
if (term.size() > 2)
{
std::cout << Sec(tAr.getDTimer("A*B total") - busec) << " "
<< Flops(flops, tAr.getDTimer("A*B algebra") - fusec) << " "
<< Bytes(bytes, tAr.getDTimer("A*B total") - busec) << std::endl;
}
std::cout << std::setw(8) << "traces";
flops = 0.;
bytes = 0.;
fusec = tAr.getDTimer("tr(A*B)");
busec = tAr.getDTimer("tr(A*B)");
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
{
tAr.startTimer("tr(A*B)");
A2AContraction::accTrMul(result.correlator[TIME_MOD(tLast - dt)], prod, lastTerm[tLast]);
tAr.stopTimer("tr(A*B)");
flops += A2AContraction::accTrMulFlops(prod, lastTerm[tLast]);
bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
}
tAr.stopTimer("Linear algebra");
std::cout << Sec(tAr.getDTimer("tr(A*B)") - busec) << " "
<< Flops(flops, tAr.getDTimer("tr(A*B)") - fusec) << " "
<< Bytes(bytes, tAr.getDTimer("tr(A*B)") - busec) << std::endl;
if (!p.translationAverage)
{
saveCorrelator(result, par.global.output, dt, traj);
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
{
result.correlator[tLast] = 0.;
}
}
dti++;
}
if (p.translationAverage)
{
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
{
result.correlator[tLast] /= translations.size();
}
saveCorrelator(result, par.global.output, 0, traj);
}
}
tAr.stopTimer("Total");
printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
}
}
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,12 @@
#ifndef Hadrons_Contractor_hpp_
#define Hadrons_Contractor_hpp_
#include <Hadrons/Global.hpp>
BEGIN_HADRONS_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Contractor_hpp_

View File

@ -0,0 +1,434 @@
#include <Hadrons/Global.hpp>
#include <Hadrons/A2AMatrix.hpp>
#ifdef USE_MKL
#include "mkl.h"
#include "mkl_cblas.h"
#endif
using namespace Grid;
using namespace Hadrons;
#ifdef GRID_COMMS_MPI3
#define GET_RANK(rank, nMpi) \
MPI_Comm_size(MPI_COMM_WORLD, &(nMpi));\
MPI_Comm_rank(MPI_COMM_WORLD, &(rank))
#define BARRIER() MPI_Barrier(MPI_COMM_WORLD)
#define INIT() MPI_Init(NULL, NULL)
#define FINALIZE() MPI_Finalize()
#else
#define GET_RANK(rank, nMpi) (nMpi) = 1; (rank) = 0
#define BARRIER()
#define INIT()
#define FINALIZE()
#endif
template <typename Function, typename MatLeft, typename MatRight>
inline void trBenchmark(const std::string name, const MatLeft &left,
const MatRight &right, const ComplexD ref, Function fn)
{
double t, flops, bytes, n = left[0].rows()*left[0].cols();
unsigned int nMat = left.size();
int nMpi, rank;
ComplexD buf;
t = 0.;
GET_RANK(rank, nMpi);
t = -usecond();
BARRIER();
for (unsigned int i = rank*nMat/nMpi; i < (rank+1)*nMat/nMpi; ++i)
{
fn(buf, left[i], right[i]);
}
BARRIER();
t += usecond();
flops = nMat*(6.*n + 2.*(n - 1.));
bytes = nMat*(2.*n*sizeof(ComplexD));
if (rank == 0)
{
std::cout << std::setw(34) << name << ": diff= "
<< std::setw(12) << std::norm(buf-ref)
<< std::setw(10) << t/1.0e6 << " sec "
<< std::setw(10) << flops/t/1.0e3 << " GFlop/s "
<< std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s "
<< std::endl;
}
::sleep(1);
}
template <typename Function, typename MatV, typename Mat>
inline void mulBenchmark(const std::string name, const MatV &left,
const MatV &right, const Mat &ref, Function fn)
{
double t, flops, bytes;
double nr = left[0].rows(), nc = left[0].cols(), n = nr*nc;
unsigned int nMat = left.size();
int nMpi, rank;
Mat buf(left[0].rows(), left[0].rows());
t = 0.;
GET_RANK(rank, nMpi);
t = -usecond();
BARRIER();
for (unsigned int i = rank*nMat/nMpi; i < (rank+1)*nMat/nMpi; ++i)
{
fn(buf, left[i], right[i]);
}
BARRIER();
t += usecond();
flops = nMat*(nr*nr*(6.*nc + 2.*(nc - 1.)));
bytes = nMat*(2*nc*nr*sizeof(ComplexD));
if (rank == 0)
{
std::cout << std::setw(34) << name << ": diff= "
<< std::setw(12) << (buf-ref).squaredNorm()
<< std::setw(10) << t/1.0e6 << " sec "
<< std::setw(10) << flops/t/1.0e3 << " GFlop/s "
<< std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s "
<< std::endl;
}
::sleep(1);
}
#ifdef USE_MKL
template <typename MatLeft, typename MatRight>
static inline void zdotuRow(ComplexD &res, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
const ComplexD *aPt, *bPt;
unsigned int aInc, bInc;
if (MatLeft::Options == Eigen::RowMajor)
{
aPt = a.data() + aRow*a.cols();
aInc = 1;
}
else if (MatLeft::Options == Eigen::ColMajor)
{
aPt = a.data() + aRow;
aInc = a.rows();
}
if (MatRight::Options == Eigen::RowMajor)
{
bPt = b.data() + aRow;
bInc = b.cols();
}
else if (MatRight::Options == Eigen::ColMajor)
{
bPt = b.data() + aRow*b.rows();
bInc = 1;
}
cblas_zdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void zdotuCol(ComplexD &res, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
const ComplexD *aPt, *bPt;
unsigned int aInc, bInc;
if (MatLeft::Options == Eigen::RowMajor)
{
aPt = a.data() + aCol;
aInc = a.cols();
}
else if (MatLeft::Options == Eigen::ColMajor)
{
aPt = a.data() + aCol*a.rows();
aInc = 1;
}
if (MatRight::Options == Eigen::RowMajor)
{
bPt = b.data() + aCol*b.cols();
bInc = 1;
}
else if (MatRight::Options == Eigen::ColMajor)
{
bPt = b.data() + aCol;
bInc = b.rows();
}
cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
}
#endif
template <typename MatLeft, typename MatRight>
void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigned int nMat)
{
std::vector<MatLeft> left;
std::vector<MatRight> right;
MatRight buf;
ComplexD ref;
int rank, nMpi;
left.resize(nMat, MatLeft::Random(ni, nj));
right.resize(nMat, MatRight::Random(nj, ni));
GET_RANK(rank, nMpi);
if (rank == 0)
{
std::cout << "==== tr(A*B) benchmarks" << std::endl;
std::cout << "A matrices use ";
if (MatLeft::Options == Eigen::RowMajor)
{
std::cout << "row-major ordering" << std::endl;
}
else if (MatLeft::Options == Eigen::ColMajor)
{
std::cout << "col-major ordering" << std::endl;
}
std::cout << "B matrices use ";
if (MatRight::Options == Eigen::RowMajor)
{
std::cout << "row-major ordering" << std::endl;
}
else if (MatRight::Options == Eigen::ColMajor)
{
std::cout << "col-major ordering" << std::endl;
}
std::cout << std::endl;
}
BARRIER();
ref = (left.back()*right.back()).trace();
trBenchmark("Hadrons A2AContraction::accTrMul", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = 0.;
A2AContraction::accTrMul(res, a, b);
});
trBenchmark("Naive loop rows first", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
auto nr = a.rows(), nc = a.cols();
res = 0.;
parallel_for (unsigned int i = 0; i < nr; ++i)
{
ComplexD tmp = 0.;
for (unsigned int j = 0; j < nc; ++j)
{
tmp += a(i, j)*b(j, i);
}
parallel_critical
{
res += tmp;
}
}
});
trBenchmark("Naive loop cols first", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
auto nr = a.rows(), nc = a.cols();
res = 0.;
parallel_for (unsigned int j = 0; j < nc; ++j)
{
ComplexD tmp = 0.;
for (unsigned int i = 0; i < nr; ++i)
{
tmp += a(i, j)*b(j, i);
}
parallel_critical
{
res += tmp;
}
}
});
trBenchmark("Eigen tr(A*B)", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = (a*b).trace();
});
trBenchmark("Eigen row-wise dot", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = 0.;
parallel_for (unsigned int r = 0; r < a.rows(); ++r)
{
ComplexD tmp;
tmp = a.row(r).conjugate().dot(b.col(r));
parallel_critical
{
res += tmp;
}
}
});
trBenchmark("Eigen col-wise dot", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = 0.;
parallel_for (unsigned int c = 0; c < a.cols(); ++c)
{
ComplexD tmp;
tmp = a.col(c).conjugate().dot(b.row(c));
parallel_critical
{
res += tmp;
}
}
});
trBenchmark("Eigen Hadamard", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = a.cwiseProduct(b.transpose()).sum();
});
#ifdef USE_MKL
trBenchmark("MKL row-wise zdotu", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = 0.;
parallel_for (unsigned int r = 0; r < a.rows(); ++r)
{
ComplexD tmp;
zdotuRow(tmp, r, a, b);
parallel_critical
{
res += tmp;
}
}
});
trBenchmark("MKL col-wise zdotu", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = 0.;
parallel_for (unsigned int c = 0; c < a.cols(); ++c)
{
ComplexD tmp;
zdotuCol(tmp, c, a, b);
parallel_critical
{
res += tmp;
}
}
});
#endif
BARRIER();
if (rank == 0)
{
std::cout << std::endl;
}
}
template <typename Mat>
void fullMulBenchmark(const unsigned int ni, const unsigned int nj, const unsigned int nMat)
{
std::vector<Mat> left, right;
Mat ref;
int rank, nMpi;
left.resize(nMat, Mat::Random(ni, nj));
right.resize(nMat, Mat::Random(nj, ni));
GET_RANK(rank, nMpi);
if (rank == 0)
{
std::cout << "==== A*B benchmarks" << std::endl;
std::cout << "all matrices use ";
if (Mat::Options == Eigen::RowMajor)
{
std::cout << "row-major ordering" << std::endl;
}
else if (Mat::Options == Eigen::ColMajor)
{
std::cout << "col-major ordering" << std::endl;
}
std::cout << std::endl;
}
BARRIER();
ref = left.back()*right.back();
mulBenchmark("Hadrons A2AContraction::mul", left, right, ref,
[](Mat &res, const Mat &a, const Mat &b)
{
A2AContraction::mul(res, a, b);
});
mulBenchmark("Eigen A*B", left, right, ref,
[](Mat &res, const Mat &a, const Mat &b)
{
res = a*b;
});
#ifdef USE_MKL
mulBenchmark("MKL A*B", left, right, ref,
[](Mat &res, const Mat &a, const Mat &b)
{
const ComplexD one(1., 0.), zero(0., 0.);
if (Mat::Options == Eigen::RowMajor)
{
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
res.data(), res.cols());
}
else if (Mat::Options == Eigen::ColMajor)
{
cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
res.data(), res.rows());
}
});
#endif
BARRIER();
if (rank == 0)
{
std::cout << std::endl;
}
}
int main(int argc, char *argv[])
{
// parse command line
Eigen::Index ni, nj, nMat;
int nMpi, rank;
if (argc != 4)
{
std::cerr << "usage: " << argv[0] << " <Ni> <Nj> <#matrices>";
std::cerr << std::endl;
return EXIT_FAILURE;
}
ni = std::stoi(argv[1]);
nj = std::stoi(argv[2]);
nMat = std::stoi(argv[3]);
INIT();
GET_RANK(rank, nMpi);
if (rank == 0)
{
std::cout << "\n*** ALL-TO-ALL MATRIX CONTRACTION BENCHMARK ***\n" << std::endl;
std::cout << nMat << " couples of " << ni << "x" << nj << " matrices\n" << std::endl;
std::cout << nMpi << " MPI processes" << std::endl;
#ifdef GRID_OMP
#pragma omp parallel
{
#pragma omp single
std::cout << omp_get_num_threads() << " threads\n" << std::endl;
}
#else
std::cout << "Single-threaded\n" << std::endl;
#endif
#ifdef EIGEN_USE_MKL_ALL
std::cout << "Eigen uses the MKL" << std::endl;
#endif
std::cout << "Eigen uses " << Eigen::nbThreads() << " threads" << std::endl;
#ifdef USE_MKL
std::cout << "MKL uses " << mkl_get_max_threads() << " threads" << std::endl;
#endif
std::cout << std::endl;
}
fullTrBenchmark<A2AMatrix<ComplexD>, A2AMatrix<ComplexD>>(ni, nj, nMat);
fullTrBenchmark<A2AMatrix<ComplexD>, A2AMatrixTr<ComplexD>>(ni, nj, nMat);
fullTrBenchmark<A2AMatrixTr<ComplexD>, A2AMatrix<ComplexD>>(ni, nj, nMat);
fullTrBenchmark<A2AMatrixTr<ComplexD>, A2AMatrixTr<ComplexD>>(ni, nj, nMat);
fullMulBenchmark<A2AMatrix<ComplexD>>(ni, nj, nMat);
fullMulBenchmark<A2AMatrixTr<ComplexD>>(ni, nj, nMat);
FINALIZE();
return EXIT_SUCCESS;
}

View File

@ -35,7 +35,7 @@ using namespace Hadrons;
template <typename FOut, typename FIn> template <typename FOut, typename FIn>
void convert(const std::string outFilename, const std::string inFilename, void convert(const std::string outFilename, const std::string inFilename,
const unsigned int Ls, const bool rb, const unsigned int size, const unsigned int Ls, const bool rb, const unsigned int size,
const bool multiFile) const bool multiFile, const bool testRead)
{ {
assert(outFilename != inFilename); assert(outFilename != inFilename);
@ -102,6 +102,7 @@ void convert(const std::string outFilename, const std::string inFilename,
LOG(Message) << "Out type : " << typeName<FOut>() << std::endl; LOG(Message) << "Out type : " << typeName<FOut>() << std::endl;
LOG(Message) << "#vectors : " << size << std::endl; LOG(Message) << "#vectors : " << size << std::endl;
LOG(Message) << "Multifile : " << (multiFile ? "yes" : "no") << std::endl; LOG(Message) << "Multifile : " << (multiFile ? "yes" : "no") << std::endl;
LOG(Message) << "Test read : " << (testRead ? "yes" : "no") << std::endl;
if (multiFile) if (multiFile)
{ {
for(unsigned int k = 0; k < size; ++k) for(unsigned int k = 0; k < size; ++k)
@ -112,6 +113,8 @@ void convert(const std::string outFilename, const std::string inFilename,
LOG(Message) << "==== Converting vector " << k << std::endl; LOG(Message) << "==== Converting vector " << k << std::endl;
LOG(Message) << "In : " << inV << std::endl; LOG(Message) << "In : " << inV << std::endl;
LOG(Message) << "Out: " << outV << std::endl; LOG(Message) << "Out: " << outV << std::endl;
// conversion
LOG(Message) << "-- Doing conversion" << std::endl;
makeFileDir(outV, gOut); makeFileDir(outV, gOut);
binWriter.open(outV); binWriter.open(outV);
binReader.open(inV); binReader.open(inV);
@ -121,10 +124,20 @@ void convert(const std::string outFilename, const std::string inFilename,
EigenPackIo::writeElement<FIn, FOut>(binWriter, bufIn, eval, k, &bufOut, &testIn); EigenPackIo::writeElement<FIn, FOut>(binWriter, bufIn, eval, k, &bufOut, &testIn);
binWriter.close(); binWriter.close();
binReader.close(); binReader.close();
// read test
if (testRead)
{
LOG(Message) << "-- Test read" << std::endl;
binReader.open(outV);
EigenPackIo::readElement<FOut>(bufOut, eval, k, binReader);
binReader.close();
}
} }
} }
else else
{ {
// conversion
LOG(Message) << "-- Doing conversion" << std::endl;
makeFileDir(outFilename, gOut); makeFileDir(outFilename, gOut);
binWriter.open(outFilename); binWriter.open(outFilename);
binReader.open(inFilename); binReader.open(inFilename);
@ -137,6 +150,18 @@ void convert(const std::string outFilename, const std::string inFilename,
} }
binWriter.close(); binWriter.close();
binReader.close(); binReader.close();
// read test
if (testRead)
{
LOG(Message) << "-- Test read" << std::endl;
binReader.open(outFilename);
EigenPackIo::readHeader(record, binReader);
for(unsigned int k = 0; k < size; ++k)
{
EigenPackIo::readElement<FOut>(bufOut, eval, k, binReader);
}
binReader.close();
}
} }
} }
@ -154,11 +179,11 @@ int main(int argc, char *argv[])
// parse command line // parse command line
std::string outFilename, inFilename; std::string outFilename, inFilename;
unsigned int size, Ls; unsigned int size, Ls;
bool rb, multiFile; bool rb, multiFile, testRead;
if (argc < 7) if (argc < 8)
{ {
std::cerr << "usage: " << argv[0] << " <out eigenpack> <in eigenpack> <Ls> <red-black (0|1)> <#vector> <multifile (0|1)> [Grid options]"; std::cerr << "usage: " << argv[0] << " <out eigenpack> <in eigenpack> <Ls> <red-black {0|1}> <#vector> <multifile {0|1}> <test read {0|1}> [Grid options]";
std::cerr << std::endl; std::cerr << std::endl;
std::exit(EXIT_FAILURE); std::exit(EXIT_FAILURE);
} }
@ -168,6 +193,7 @@ int main(int argc, char *argv[])
rb = (std::string(argv[4]) != "0"); rb = (std::string(argv[4]) != "0");
size = std::stoi(std::string(argv[5])); size = std::stoi(std::string(argv[5]));
multiFile = (std::string(argv[6]) != "0"); multiFile = (std::string(argv[6]) != "0");
testRead = (std::string(argv[7]) != "0");
// initialization // initialization
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
@ -176,7 +202,7 @@ int main(int argc, char *argv[])
// execution // execution
try try
{ {
convert<FOUT, FIN>(outFilename, inFilename, Ls, rb, size, multiFile); convert<FOUT, FIN>(outFilename, inFilename, Ls, rb, size, multiFile, testRead);
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {

View File

@ -1,4 +1,4 @@
bin_PROGRAMS = HadronsXmlRun HadronsFermionEP64To32 bin_PROGRAMS = HadronsXmlRun HadronsFermionEP64To32 HadronsContractor HadronsContractorBenchmark
HadronsXmlRun_SOURCES = HadronsXmlRun.cc HadronsXmlRun_SOURCES = HadronsXmlRun.cc
HadronsXmlRun_LDADD = ../libHadrons.a ../../Grid/libGrid.a HadronsXmlRun_LDADD = ../libHadrons.a ../../Grid/libGrid.a
@ -6,3 +6,9 @@ HadronsXmlRun_LDADD = ../libHadrons.a ../../Grid/libGrid.a
HadronsFermionEP64To32_SOURCES = EigenPackCast.cc HadronsFermionEP64To32_SOURCES = EigenPackCast.cc
HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField
HadronsFermionEP64To32_LDADD = ../libHadrons.a ../../Grid/libGrid.a HadronsFermionEP64To32_LDADD = ../libHadrons.a ../../Grid/libGrid.a
HadronsContractor_SOURCES = Contractor.cc
HadronsContractor_LDADD = ../libHadrons.a ../../Grid/libGrid.a
HadronsContractorBenchmark_SOURCES = ContractorBenchmark.cc
HadronsContractorBenchmark_LDADD = ../libHadrons.a ../../Grid/libGrid.a

View File

@ -20,17 +20,20 @@ modules_cc =\
Modules/MSink/Point.cc \ Modules/MSink/Point.cc \
Modules/MSink/Smear.cc \ Modules/MSink/Smear.cc \
Modules/MSolver/A2AVectors.cc \ Modules/MSolver/A2AVectors.cc \
Modules/MSolver/A2AAslashVectors.cc \
Modules/MSolver/RBPrecCG.cc \ Modules/MSolver/RBPrecCG.cc \
Modules/MSolver/MixedPrecisionRBPrecCG.cc \ Modules/MSolver/MixedPrecisionRBPrecCG.cc \
Modules/MSolver/LocalCoherenceLanczos.cc \ Modules/MSolver/LocalCoherenceLanczos.cc \
Modules/MGauge/StoutSmearing.cc \ Modules/MGauge/StoutSmearing.cc \
Modules/MGauge/Unit.cc \ Modules/MGauge/Unit.cc \
Modules/MGauge/Electrify.cc \
Modules/MGauge/UnitEm.cc \ Modules/MGauge/UnitEm.cc \
Modules/MGauge/StochEm.cc \ Modules/MGauge/StochEm.cc \
Modules/MGauge/Random.cc \ Modules/MGauge/Random.cc \
Modules/MGauge/FundtoHirep.cc \ Modules/MGauge/FundtoHirep.cc \
Modules/MGauge/GaugeFix.cc \ Modules/MGauge/GaugeFix.cc \
Modules/MNoise/TimeDilutedSpinColorDiagonal.cc \ Modules/MNoise/TimeDilutedSpinColorDiagonal.cc \
Modules/MNoise/FullVolumeSpinColorDiagonal.cc \
Modules/MUtilities/RandomVectors.cc \ Modules/MUtilities/RandomVectors.cc \
Modules/MUtilities/TestSeqGamma.cc \ Modules/MUtilities/TestSeqGamma.cc \
Modules/MUtilities/PrecisionCast.cc \ Modules/MUtilities/PrecisionCast.cc \
@ -64,7 +67,8 @@ modules_cc =\
Modules/MIO/LoadBinary.cc \ Modules/MIO/LoadBinary.cc \
Modules/MIO/LoadNersc.cc \ Modules/MIO/LoadNersc.cc \
Modules/MIO/LoadCoarseEigenPack.cc \ Modules/MIO/LoadCoarseEigenPack.cc \
Modules/MIO/LoadCosmHol.cc Modules/MIO/LoadCosmHol.cc \
Modules/MIO/LoadA2AVectors.cc
modules_hpp =\ modules_hpp =\
Modules/MContraction/Baryon.hpp \ Modules/MContraction/Baryon.hpp \
@ -93,14 +97,17 @@ modules_hpp =\
Modules/MSolver/Guesser.hpp \ Modules/MSolver/Guesser.hpp \
Modules/MSolver/RBPrecCG.hpp \ Modules/MSolver/RBPrecCG.hpp \
Modules/MSolver/A2AVectors.hpp \ Modules/MSolver/A2AVectors.hpp \
Modules/MSolver/A2AAslashVectors.hpp \
Modules/MGauge/UnitEm.hpp \ Modules/MGauge/UnitEm.hpp \
Modules/MGauge/StoutSmearing.hpp \ Modules/MGauge/StoutSmearing.hpp \
Modules/MGauge/Unit.hpp \ Modules/MGauge/Unit.hpp \
Modules/MGauge/Electrify.hpp \
Modules/MGauge/Random.hpp \ Modules/MGauge/Random.hpp \
Modules/MGauge/GaugeFix.hpp \ Modules/MGauge/GaugeFix.hpp \
Modules/MGauge/FundtoHirep.hpp \ Modules/MGauge/FundtoHirep.hpp \
Modules/MGauge/StochEm.hpp \ Modules/MGauge/StochEm.hpp \
Modules/MNoise/TimeDilutedSpinColorDiagonal.hpp \ Modules/MNoise/TimeDilutedSpinColorDiagonal.hpp \
Modules/MNoise/FullVolumeSpinColorDiagonal.hpp \
Modules/MUtilities/PrecisionCast.hpp \ Modules/MUtilities/PrecisionCast.hpp \
Modules/MUtilities/RandomVectors.hpp \ Modules/MUtilities/RandomVectors.hpp \
Modules/MUtilities/TestSeqGamma.hpp \ Modules/MUtilities/TestSeqGamma.hpp \
@ -134,6 +141,7 @@ modules_hpp =\
Modules/MScalarSUN/TrKinetic.hpp \ Modules/MScalarSUN/TrKinetic.hpp \
Modules/MIO/LoadEigenPack.hpp \ Modules/MIO/LoadEigenPack.hpp \
Modules/MIO/LoadNersc.hpp \ Modules/MIO/LoadNersc.hpp \
Modules/MIO/LoadA2AVectors.hpp \
Modules/MIO/LoadCosmHol.hpp \ Modules/MIO/LoadCosmHol.hpp \
Modules/MIO/LoadCoarseEigenPack.hpp \ Modules/MIO/LoadCoarseEigenPack.hpp \
Modules/MIO/LoadBinary.hpp Modules/MIO/LoadBinary.hpp

View File

@ -3,9 +3,6 @@
#define MSG std::cout << GridLogMessage #define MSG std::cout << GridLogMessage
#define SEP \ #define SEP \
"=============================================================================" "============================================================================="
#ifndef BENCH_IO_LMAX
#define BENCH_IO_LMAX 40
#endif
using namespace Grid; using namespace Grid;
using namespace QCD; using namespace QCD;
@ -41,7 +38,7 @@ int main (int argc, char ** argv)
int64_t threads = GridThread::GetThreads(); int64_t threads = GridThread::GetThreads();
MSG << "Grid is setup to use " << threads << " threads" << std::endl; MSG << "Grid is setup to use " << threads << " threads" << std::endl;
MSG << SEP << std::endl; MSG << SEP << std::endl;
MSG << "Benchmark Lime write" << std::endl; MSG << "Benchmark double precision Lime write" << std::endl;
MSG << SEP << std::endl; MSG << SEP << std::endl;
for (auto &d: dir) for (auto &d: dir)
{ {
@ -49,7 +46,8 @@ int main (int argc, char ** argv)
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermion>, Ls, rb); writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermion>, Ls, rb);
} }
MSG << "Benchmark Lime read" << std::endl; MSG << SEP << std::endl;
MSG << "Benchmark double precision Lime read" << std::endl;
MSG << SEP << std::endl; MSG << SEP << std::endl;
for (auto &d: dir) for (auto &d: dir)
{ {
@ -57,6 +55,24 @@ int main (int argc, char ** argv)
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermion>, Ls, rb); readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermion>, Ls, rb);
} }
MSG << SEP << std::endl;
MSG << "Benchmark single precision Lime write" << std::endl;
MSG << SEP << std::endl;
for (auto &d: dir)
{
MSG << "-- Directory " << d << std::endl;
writeBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermionF>, Ls, rb);
}
MSG << SEP << std::endl;
MSG << "Benchmark single precision Lime read" << std::endl;
MSG << SEP << std::endl;
for (auto &d: dir)
{
MSG << "-- Directory " << d << std::endl;
readBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermionF>, Ls, rb);
}
Grid_finalize(); Grid_finalize();
return EXIT_SUCCESS; return EXIT_SUCCESS;

View File

@ -123,10 +123,13 @@ case ${ac_SFW_FP16} in
AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);; AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);;
esac esac
############### MKL ############### Intel libraries
AC_ARG_ENABLE([mkl], AC_ARG_ENABLE([mkl],
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
[ac_MKL=${enable_mkl}], [ac_MKL=no]) [ac_MKL=${enable_mkl}], [ac_MKL=no])
AC_ARG_ENABLE([ipp],
[AC_HELP_STRING([--enable-ipp=yes|no|prefix], [enable Intel IPP for fast CRC32C])],
[ac_IPP=${enable_mkl}], [ac_IPP=no])
case ${ac_MKL} in case ${ac_MKL} in
no) no)
@ -139,6 +142,17 @@ case ${ac_MKL} in
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);; AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
esac esac
case ${ac_IPP} in
no)
;;
yes)
AC_DEFINE([USE_IPP], [1], [Define to 1 if you use the Intel IPP]);;
*)
AM_CXXFLAGS="-I$ac_IPP/include $AM_CXXFLAGS"
AM_LDFLAGS="-L$ac_IPP/lib $AM_LDFLAGS"
AC_DEFINE([USE_IPP], [1], [Define to 1 if you use the Intel IPP]);;
esac
############### HDF5 ############### HDF5
AC_ARG_WITH([hdf5], AC_ARG_WITH([hdf5],
[AS_HELP_STRING([--with-hdf5=prefix], [AS_HELP_STRING([--with-hdf5=prefix],
@ -170,7 +184,13 @@ AC_CHECK_FUNCS([gettimeofday])
if test "${ac_MKL}x" != "nox"; then if test "${ac_MKL}x" != "nox"; then
AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [], AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [],
[AC_MSG_ERROR("MKL enabled but library not found")]) [AC_MSG_ERROR("Intel MKL enabled but library not found")])
fi
if test "${ac_IPP}x" != "nox"; then
AC_SEARCH_LIBS([ippsCRC32C_8u], [ippdc],
[LIBS="${LIBS} -lippdc -lippvm -lipps -lippcore"],
[AC_MSG_ERROR("Intel IPP enabled but library not found")])
fi fi
AC_SEARCH_LIBS([__gmpf_init], [gmp], AC_SEARCH_LIBS([__gmpf_init], [gmp],
@ -485,6 +505,7 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg])
############### Ouput ############### Ouput
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
GRID_CXX="$CXX"
GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS" GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS"
GRID_LIBS=$LIBS GRID_LIBS=$LIBS
@ -497,6 +518,7 @@ AM_LDFLAGS="-L${cwd}/Grid $AM_LDFLAGS"
AC_SUBST([AM_CFLAGS]) AC_SUBST([AM_CFLAGS])
AC_SUBST([AM_CXXFLAGS]) AC_SUBST([AM_CXXFLAGS])
AC_SUBST([AM_LDFLAGS]) AC_SUBST([AM_LDFLAGS])
AC_SUBST([GRID_CXX])
AC_SUBST([GRID_CXXFLAGS]) AC_SUBST([GRID_CXXFLAGS])
AC_SUBST([GRID_LDFLAGS]) AC_SUBST([GRID_LDFLAGS])
AC_SUBST([GRID_LIBS]) AC_SUBST([GRID_LIBS])

View File

@ -61,6 +61,10 @@ while test $# -gt 0; do
echo @GRID_CXXFLAGS@ echo @GRID_CXXFLAGS@
;; ;;
--cxx)
echo @GRID_CXX@
;;
--ldflags) --ldflags)
echo @GRID_LDFLAGS@ echo @GRID_LDFLAGS@
;; ;;

View File

@ -0,0 +1,104 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_mrhs_cg.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
typedef LatticeComplex ComplexField;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
int nd = latt_size.size();
int ndm1 = nd-1;
std::vector<int> simd_layout = GridDefaultSimd(nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> mpi_split (mpi_layout.size(),1);
std::cout << " Full " << GridCmdVectorIntToString(latt_size) << " subgrid" <<std::endl;
std::cout << " Full " << GridCmdVectorIntToString(mpi_layout) << " sub communicator"<<std::endl;
std::cout << " Full " << GridCmdVectorIntToString(simd_layout)<< " simd layout " <<std::endl;
GridCartesian * GridN = new GridCartesian(latt_size,
simd_layout,
mpi_layout);
std::vector<int> latt_m = latt_size; latt_m[nd-1] = 1;
std::vector<int> mpi_m = mpi_layout; mpi_m [nd-1] = 1;
std::vector<int> simd_m = GridDefaultSimd(ndm1,vComplex::Nsimd()); simd_m.push_back(1);
std::cout << " Requesting " << GridCmdVectorIntToString(latt_m)<< " subgrid" <<std::endl;
std::cout << " Requesting " << GridCmdVectorIntToString(mpi_m) << " sub communicator"<<std::endl;
std::cout << " Requesting " << GridCmdVectorIntToString(simd_m)<< " simd layout " <<std::endl;
GridCartesian * Grid_m = new GridCartesian(latt_m,
simd_m,
mpi_m,
*GridN);
Complex C(1.0);
Complex tmp;
ComplexField Full(GridN); Full = C;
ComplexField Full_cpy(GridN);
ComplexField Split(Grid_m);Split= C;
std::cout << GridLogMessage<< " Full volume "<< norm2(Full) <<std::endl;
std::cout << GridLogMessage<< " Split volume "<< norm2(Split) <<std::endl;
tmp=C;
GridN->GlobalSum(tmp);
std::cout << GridLogMessage<< " Full nodes "<< tmp <<std::endl;
tmp=C;
Grid_m->GlobalSum(tmp);
std::cout << GridLogMessage<< " Split nodes "<< tmp <<std::endl;
GridN->Barrier();
auto local_latt = GridN->LocalDimensions();
Full_cpy = zero;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG RNG(GridN); RNG.SeedFixedIntegers(seeds);
random(RNG,Full);
for(int t=0;t<local_latt[nd-1];t++){
ExtractSliceLocal(Split,Full,0,t,Tp);
InsertSliceLocal (Split,Full_cpy,0,t,Tp);
}
Full_cpy = Full_cpy - Full;
std::cout << " NormFull " << norm2(Full)<<std::endl;
std::cout << " NormDiff " << norm2(Full_cpy)<<std::endl;
Grid_finalize();
}

View File

@ -72,6 +72,7 @@ int main(int argc, char *argv[])
// set fermion boundary conditions to be periodic space, antiperiodic time. // set fermion boundary conditions to be periodic space, antiperiodic time.
std::string boundary = "1 1 1 -1"; std::string boundary = "1 1 1 -1";
std::string twist = "0. 0. 0. 0.";
//stochastic photon field //stochastic photon field
MGauge::StochEm::Par photonPar; MGauge::StochEm::Par photonPar;
@ -90,6 +91,7 @@ int main(int argc, char *argv[])
actionPar.M5 = 1.8; actionPar.M5 = 1.8;
actionPar.mass = mass[i]; actionPar.mass = mass[i];
actionPar.boundary = boundary; actionPar.boundary = boundary;
actionPar.twist = "0. 0. 0. 0.";
application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar); application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar);

View File

@ -126,6 +126,7 @@ inline void makeWilsonAction(Application &application, std::string actionName,
actionPar.gauge = gaugeField; actionPar.gauge = gaugeField;
actionPar.mass = mass; actionPar.mass = mass;
actionPar.boundary = boundary; actionPar.boundary = boundary;
actionPar.twist = "0. 0. 0. 0.";
application.createModule<MAction::Wilson>(actionName, actionPar); application.createModule<MAction::Wilson>(actionName, actionPar);
} }
} }
@ -154,6 +155,7 @@ inline void makeDWFAction(Application &application, std::string actionName,
actionPar.M5 = M5; actionPar.M5 = M5;
actionPar.mass = mass; actionPar.mass = mass;
actionPar.boundary = boundary; actionPar.boundary = boundary;
actionPar.twist = "0. 0. 0. 0.";
application.createModule<MAction::DWF>(actionName, actionPar); application.createModule<MAction::DWF>(actionName, actionPar);
} }
} }

View File

@ -66,6 +66,7 @@ int main(int argc, char *argv[])
// set fermion boundary conditions to be periodic space, antiperiodic time. // set fermion boundary conditions to be periodic space, antiperiodic time.
std::string boundary = "1 1 1 -1"; std::string boundary = "1 1 1 -1";
std::string twist = "0. 0. 0. 0.";
// sink // sink
MSink::Point::Par sinkPar; MSink::Point::Par sinkPar;
@ -80,6 +81,7 @@ int main(int argc, char *argv[])
actionPar.M5 = 1.8; actionPar.M5 = 1.8;
actionPar.mass = mass[i]; actionPar.mass = mass[i];
actionPar.boundary = boundary; actionPar.boundary = boundary;
actionPar.twist = twist;
application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar); application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar);
// solvers // solvers

View File

@ -72,6 +72,7 @@ int main(int argc, char *argv[])
// set fermion boundary conditions to be periodic space, antiperiodic time. // set fermion boundary conditions to be periodic space, antiperiodic time.
std::string boundary = "1 1 1 -1"; std::string boundary = "1 1 1 -1";
std::string twist = "0. 0. 0. 0.";
for (unsigned int i = 0; i < flavour.size(); ++i) for (unsigned int i = 0; i < flavour.size(); ++i)
{ {
@ -82,6 +83,7 @@ int main(int argc, char *argv[])
actionPar.M5 = 1.8; actionPar.M5 = 1.8;
actionPar.mass = mass[i]; actionPar.mass = mass[i];
actionPar.boundary = boundary; actionPar.boundary = boundary;
actionPar.twist = twist;
application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar); application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar);
// solvers // solvers

View File

@ -38,6 +38,7 @@ int main (int argc, char ** argv)
typedef typename DomainWallFermionR::ComplexField ComplexField; typedef typename DomainWallFermionR::ComplexField ComplexField;
typename DomainWallFermionR::ImplParams params; typename DomainWallFermionR::ImplParams params;
double stp=1.0e-5;
const int Ls=4; const int Ls=4;
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
@ -197,7 +198,7 @@ int main (int argc, char ** argv)
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf); MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk); MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
ConjugateGradient<FermionField> CG((1.0e-2),10000); ConjugateGradient<FermionField> CG((stp),10000);
s_res = zero; s_res = zero;
CG(HermOp,s_src,s_res); CG(HermOp,s_src,s_res);
@ -227,5 +228,11 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)/norm2(src[n])<<std::endl; std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)/norm2(src[n])<<std::endl;
} }
for(int s=0;s<nrhs;s++) result[s]=zero;
int blockDim = 0;//not used for BlockCGVec
BlockConjugateGradient<FermionField> BCGV (BlockCGVec,blockDim,stp,10000);
BCGV.PrintInterval=10;
BCGV(HermOpCk,src,result);
Grid_finalize(); Grid_finalize();
} }

View File

@ -0,0 +1,220 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_mrhs_cg.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
typedef typename MobiusFermionR::FermionField FermionField;
typedef typename MobiusFermionR::ComplexField ComplexField;
typename MobiusFermionR::ImplParams params;
const int Ls=12;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> mpi_split (mpi_layout.size(),1);
std::vector<int> split_coor (mpi_layout.size(),1);
std::vector<int> split_dim (mpi_layout.size(),1);
std::vector<ComplexD> boundary_phases(Nd,1.);
boundary_phases[Nd-1]=-1.;
params.boundary_phases = boundary_phases;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
/////////////////////////////////////////////
// Split into 1^4 mpi communicators
/////////////////////////////////////////////
for(int i=0;i<argc;i++){
if(std::string(argv[i]) == "--split"){
for(int k=0;k<mpi_layout.size();k++){
std::stringstream ss;
ss << argv[i+1+k];
ss >> mpi_split[k];
}
break;
}
}
double stp = 1.e-8;
int nrhs = 1;
int me;
for(int i=0;i<mpi_layout.size();i++){
// split_dim[i] = (mpi_layout[i]/mpi_split[i]);
nrhs *= (mpi_layout[i]/mpi_split[i]);
// split_coor[i] = FGrid._processor_coor[i]/mpi_split[i];
}
std::cout << GridLogMessage << "Creating split grids " <<std::endl;
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
mpi_split,
*UGrid,me);
std::cout << GridLogMessage <<"Creating split ferm grids " <<std::endl;
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid);
std::cout << GridLogMessage <<"Creating split rb grids " <<std::endl;
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
std::cout << GridLogMessage <<"Creating split ferm rb grids " <<std::endl;
GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid);
std::cout << GridLogMessage << "Made the grids"<<std::endl;
///////////////////////////////////////////////
// Set up the problem as a 4d spreadout job
///////////////////////////////////////////////
std::vector<int> seeds({1,2,3,4});
std::vector<FermionField> src(nrhs,FGrid);
std::vector<FermionField> src_chk(nrhs,FGrid);
std::vector<FermionField> result(nrhs,FGrid);
FermionField tmp(FGrid);
std::cout << GridLogMessage << "Made the Fermion Fields"<<std::endl;
for(int s=0;s<nrhs;s++) result[s]=zero;
GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds);
for(int s=0;s<nrhs;s++) {
random(pRNG5,src[s]);
std::cout << GridLogMessage << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
}
std::cout << GridLogMessage << "Intialised the Fermion Fields"<<std::endl;
LatticeGaugeField Umu(UGrid);
if(0) {
FieldMetaData header;
std::string file("./lat.in");
NerscIO::readConfiguration(Umu,header,file);
std::cout << GridLogMessage << " "<<file<<" successfully read" <<std::endl;
} else {
GridParallelRNG pRNG(UGrid );
std::cout << GridLogMessage << "Intialising 4D RNG "<<std::endl;
pRNG.SeedFixedIntegers(seeds);
std::cout << GridLogMessage << "Intialised 4D RNG "<<std::endl;
SU3::HotConfiguration(pRNG,Umu);
std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<<std::endl;
std::cout << " Site zero "<< Umu._odata[0] <<std::endl;
}
/////////////////
// MPI only sends
/////////////////
LatticeGaugeField s_Umu(SGrid);
FermionField s_src(SFGrid);
FermionField s_tmp(SFGrid);
FermionField s_res(SFGrid);
std::cout << GridLogMessage << "Made the split grid fields"<<std::endl;
///////////////////////////////////////////////////////////////
// split the source out using MPI instead of I/O
///////////////////////////////////////////////////////////////
Grid_split (Umu,s_Umu);
Grid_split (src,s_src);
std::cout << GridLogMessage << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl;
///////////////////////////////////////////////////////////////
// Set up N-solvers as trivially parallel
///////////////////////////////////////////////////////////////
std::cout << GridLogMessage << " Building the solvers"<<std::endl;
// RealD mass=0.00107;
RealD mass=0.1;
RealD M5=1.8;
RealD mobius_factor=32./12.;
RealD mobius_b=0.5*(mobius_factor+1.);
RealD mobius_c=0.5*(mobius_factor-1.);
MobiusFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5,mobius_b,mobius_c,params);
MobiusFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5,mobius_b,mobius_c,params);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling DWF CG "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
MdagMLinearOperator<MobiusFermionR,FermionField> HermOp(Ddwf);
MdagMLinearOperator<MobiusFermionR,FermionField> HermOpCk(Dchk);
ConjugateGradient<FermionField> CG((stp),100000);
s_res = zero;
CG(HermOp,s_src,s_res);
std::cout << GridLogMessage << " split residual norm "<<norm2(s_res)<<std::endl;
/////////////////////////////////////////////////////////////
// Report how long they all took
/////////////////////////////////////////////////////////////
std::vector<uint32_t> iterations(nrhs,0);
iterations[me] = CG.IterationsToComplete;
for(int n=0;n<nrhs;n++){
UGrid->GlobalSum(iterations[n]);
std::cout << GridLogMessage<<" Rank "<<n<<" "<< iterations[n]<<" CG iterations"<<std::endl;
}
/////////////////////////////////////////////////////////////
// Gather and residual check on the results
/////////////////////////////////////////////////////////////
std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl;
Grid_unsplit(result,s_res);
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
for(int n=0;n<nrhs;n++){
std::cout << GridLogMessage<< " res["<<n<<"] norm "<<norm2(result[n])<<std::endl;
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
std::cout << GridLogMessage<<" resid["<<n<<"] "<< std::sqrt(norm2(tmp)/norm2(src[n]))<<std::endl;
}
for(int s=0;s<nrhs;s++){
result[s]=zero;
}
/////////////////////////////////////////////////////////////
// Try block CG
/////////////////////////////////////////////////////////////
int blockDim = 0;//not used for BlockCGVec
BlockConjugateGradient<FermionField> BCGV (BlockCGrQVec,blockDim,stp,100000);
{
BCGV(HermOpCk,src,result);
}
Grid_finalize();
}

View File

@ -0,0 +1,144 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_mrhs_cg.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
typedef typename DomainWallFermionR::FermionField FermionField;
typedef typename DomainWallFermionR::ComplexField ComplexField;
typename DomainWallFermionR::ImplParams params;
const int Ls=16;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<ComplexD> boundary_phases(Nd,1.);
boundary_phases[Nd-1]=-1.;
params.boundary_phases = boundary_phases;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
double stp = 1.e-8;
int nrhs = 2;
///////////////////////////////////////////////
// Set up the problem as a 4d spreadout job
///////////////////////////////////////////////
std::vector<int> seeds({1,2,3,4});
std::vector<FermionField> src(nrhs,FGrid);
std::vector<FermionField> src_chk(nrhs,FGrid);
std::vector<FermionField> result(nrhs,FGrid);
FermionField tmp(FGrid);
std::cout << GridLogMessage << "Made the Fermion Fields"<<std::endl;
for(int s=0;s<nrhs;s++) result[s]=zero;
GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds);
for(int s=0;s<nrhs;s++) {
random(pRNG5,src[s]);
std::cout << GridLogMessage << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
}
std::cout << GridLogMessage << "Intialised the Fermion Fields"<<std::endl;
LatticeGaugeField Umu(UGrid);
int conf = 0;
if(conf==0) {
FieldMetaData header;
std::string file("./lat.in");
NerscIO::readConfiguration(Umu,header,file);
std::cout << GridLogMessage << " Config "<<file<<" successfully read" <<std::endl;
} else if (conf==1){
GridParallelRNG pRNG(UGrid );
pRNG.SeedFixedIntegers(seeds);
SU3::HotConfiguration(pRNG,Umu);
std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<<std::endl;
} else {
SU3::ColdConfiguration(Umu);
std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<<std::endl;
}
///////////////////////////////////////////////////////////////
// Set up N-solvers as trivially parallel
///////////////////////////////////////////////////////////////
std::cout << GridLogMessage << " Building the solvers"<<std::endl;
RealD mass=0.01;
RealD M5=1.8;
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5,params);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling DWF CG "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
ConjugateGradient<FermionField> CG((stp),100000);
for(int rhs=0;rhs<1;rhs++){
result[rhs] = zero;
CG(HermOp,src[rhs],result[rhs]);
}
for(int rhs=0;rhs<1;rhs++){
std::cout << " Result["<<rhs<<"] norm = "<<norm2(result[rhs])<<std::endl;
}
/////////////////////////////////////////////////////////////
// Try block CG
/////////////////////////////////////////////////////////////
int blockDim = 0;//not used for BlockCGVec
for(int s=0;s<nrhs;s++){
result[s]=zero;
}
BlockConjugateGradient<FermionField> BCGV (BlockCGrQVec,blockDim,stp,100000);
{
BCGV(HermOp,src,result);
}
for(int rhs=0;rhs<nrhs;rhs++){
std::cout << " Result["<<rhs<<"] norm = "<<norm2(result[rhs])<<std::endl;
}
Grid_finalize();
}

View File

@ -0,0 +1,148 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_mrhs_cg.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
typedef typename DomainWallFermionR::FermionField FermionField;
typedef typename DomainWallFermionR::ComplexField ComplexField;
typename DomainWallFermionR::ImplParams params;
const int Ls=16;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<ComplexD> boundary_phases(Nd,1.);
boundary_phases[Nd-1]=-1.;
params.boundary_phases = boundary_phases;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
double stp = 1.e-8;
int nrhs = 2;
///////////////////////////////////////////////
// Set up the problem as a 4d spreadout job
///////////////////////////////////////////////
std::vector<int> seeds({1,2,3,4});
std::vector<FermionField> src4(nrhs,UGrid);
std::vector<FermionField> src(nrhs,FGrid);
std::vector<FermionField> src_chk(nrhs,FGrid);
std::vector<FermionField> result(nrhs,FGrid);
FermionField tmp(FGrid);
std::cout << GridLogMessage << "Made the Fermion Fields"<<std::endl;
for(int s=0;s<nrhs;s++) result[s]=zero;
GridParallelRNG pRNG4(UGrid); pRNG4.SeedFixedIntegers(seeds);
for(int s=0;s<nrhs;s++) {
random(pRNG4,src4[s]);
std::cout << GridLogMessage << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
}
std::cout << GridLogMessage << "Intialised the Fermion Fields"<<std::endl;
LatticeGaugeField Umu(UGrid);
int conf = 0;
if(conf==0) {
FieldMetaData header;
std::string file("./lat.in");
NerscIO::readConfiguration(Umu,header,file);
std::cout << GridLogMessage << " Config "<<file<<" successfully read" <<std::endl;
} else if (conf==1){
GridParallelRNG pRNG(UGrid );
pRNG.SeedFixedIntegers(seeds);
SU3::HotConfiguration(pRNG,Umu);
std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<<std::endl;
} else {
SU3::ColdConfiguration(Umu);
std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<<std::endl;
}
///////////////////////////////////////////////////////////////
// Set up N-solvers as trivially parallel
///////////////////////////////////////////////////////////////
std::cout << GridLogMessage << " Building the solvers"<<std::endl;
RealD mass=0.01;
RealD M5=1.8;
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5,params);
for(int s=0;s<nrhs;s++) {
Ddwf.ImportPhysicalFermionSource(src4[s],src[s]);
}
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling DWF CG "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
ConjugateGradient<FermionField> CG((stp),100000);
for(int rhs=0;rhs<1;rhs++){
result[rhs] = zero;
// CG(HermOp,src[rhs],result[rhs]);
}
for(int rhs=0;rhs<1;rhs++){
std::cout << " Result["<<rhs<<"] norm = "<<norm2(result[rhs])<<std::endl;
}
/////////////////////////////////////////////////////////////
// Try block CG
/////////////////////////////////////////////////////////////
int blockDim = 0;//not used for BlockCGVec
for(int s=0;s<nrhs;s++){
result[s]=zero;
}
BlockConjugateGradient<FermionField> BCGV (BlockCGrQVec,blockDim,stp,100000);
{
BCGV(HermOp,src,result);
}
for(int rhs=0;rhs<nrhs;rhs++){
std::cout << " Result["<<rhs<<"] norm = "<<norm2(result[rhs])<<std::endl;
}
Grid_finalize();
}

View File

@ -0,0 +1,147 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_mrhs_cg.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
typedef typename DomainWallFermionR::FermionField FermionField;
typedef typename DomainWallFermionR::ComplexField ComplexField;
typename DomainWallFermionR::ImplParams params;
const int Ls=16;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<ComplexD> boundary_phases(Nd,1.);
boundary_phases[Nd-1]=-1.;
params.boundary_phases = boundary_phases;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
double stp = 1.e-8;
int nrhs = 4;
///////////////////////////////////////////////
// Set up the problem as a 4d spreadout job
///////////////////////////////////////////////
std::vector<int> seeds({1,2,3,4});
std::vector<FermionField> src(nrhs,FGrid);
std::vector<FermionField> src_chk(nrhs,FGrid);
std::vector<FermionField> result(nrhs,FGrid);
FermionField tmp(FGrid);
std::cout << GridLogMessage << "Made the Fermion Fields"<<std::endl;
for(int s=0;s<nrhs;s++) result[s]=zero;
GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds);
for(int s=0;s<nrhs;s++) {
random(pRNG5,src[s]);
std::cout << GridLogMessage << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
}
std::cout << GridLogMessage << "Intialised the Fermion Fields"<<std::endl;
LatticeGaugeField Umu(UGrid);
int conf = 2;
if(conf==0) {
FieldMetaData header;
std::string file("./lat.in");
NerscIO::readConfiguration(Umu,header,file);
std::cout << GridLogMessage << " Config "<<file<<" successfully read" <<std::endl;
} else if (conf==1){
GridParallelRNG pRNG(UGrid );
pRNG.SeedFixedIntegers(seeds);
SU3::HotConfiguration(pRNG,Umu);
std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<<std::endl;
} else {
SU3::ColdConfiguration(Umu);
std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<<std::endl;
}
///////////////////////////////////////////////////////////////
// Set up N-solvers as trivially parallel
///////////////////////////////////////////////////////////////
std::cout << GridLogMessage << " Building the solvers"<<std::endl;
RealD mass=0.01;
RealD M5=1.8;
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5,params);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling DWF CG "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
ConjugateGradient<FermionField> CG((stp),100000);
for(int rhs=0;rhs<1;rhs++){
result[rhs] = zero;
CG(HermOp,src[rhs],result[rhs]);
}
for(int rhs=0;rhs<1;rhs++){
std::cout << " Result["<<rhs<<"] norm = "<<norm2(result[rhs])<<std::endl;
}
/////////////////////////////////////////////////////////////
// Try block CG
/////////////////////////////////////////////////////////////
int blockDim = 0;//not used for BlockCGVec
for(int s=0;s<nrhs;s++){
result[s]=zero;
}
{
BlockConjugateGradient<FermionField> BCGV (BlockCGrQVec,blockDim,stp,100000);
SchurRedBlackDiagTwoSolve<FermionField> SchurSolver(BCGV);
SchurSolver(Ddwf,src,result);
}
for(int rhs=0;rhs<nrhs;rhs++){
std::cout << " Result["<<rhs<<"] norm = "<<norm2(result[rhs])<<std::endl;
}
Grid_finalize();
}

View File

@ -67,7 +67,22 @@ int main (int argc, char ** argv)
GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds); GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds);
GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds); GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds);
FermionField src(FGrid); random(pRNG5,src); FermionField src(FGrid);
FermionField tt(FGrid);
#if 1
random(pRNG5,src);
#else
src=zero;
ComplexField coor(FGrid);
LatticeCoordinate(coor,0);
for(int ss=0;ss<FGrid->oSites();ss++){
src._odata[ss]()()(0)=coor._odata[ss]()()();
}
LatticeCoordinate(coor,1);
for(int ss=0;ss<FGrid->oSites();ss++){
src._odata[ss]()()(0)+=coor._odata[ss]()()();
}
#endif
FermionField src_o(FrbGrid); pickCheckerboard(Odd,src_o,src); FermionField src_o(FrbGrid); pickCheckerboard(Odd,src_o,src);
FermionField result_o(FrbGrid); result_o=zero; FermionField result_o(FrbGrid); result_o=zero;
RealD nrm = norm2(src); RealD nrm = norm2(src);
@ -89,7 +104,8 @@ int main (int argc, char ** argv)
ConjugateGradient<FermionField> CG(1.0e-8,10000); ConjugateGradient<FermionField> CG(1.0e-8,10000);
int blockDim = 0; int blockDim = 0;
BlockConjugateGradient<FermionField> BCGrQ(BlockCGrQ,blockDim,1.0e-8,10000); BlockConjugateGradient<FermionField> BCGrQ(BlockCGrQ,blockDim,1.0e-8,10000);
BlockConjugateGradient<FermionField> BCG (BlockCG,blockDim,1.0e-8,10000); BlockConjugateGradient<FermionField> BCG (BlockCGrQ,blockDim,1.0e-8,10000);
BlockConjugateGradient<FermionField> BCGv (BlockCGrQVec,blockDim,1.0e-8,10000);
BlockConjugateGradient<FermionField> mCG (CGmultiRHS,blockDim,1.0e-8,10000); BlockConjugateGradient<FermionField> mCG (CGmultiRHS,blockDim,1.0e-8,10000);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl; std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
@ -158,7 +174,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "************************************************************************ "<<std::endl; std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl; std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << " Calling Block CG for "<<Ls <<" right hand sides" <<std::endl; std::cout << GridLogMessage << " Calling Block CGrQ for "<<Ls <<" right hand sides" <<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl; std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
Ds.ZeroCounters(); Ds.ZeroCounters();
result_o=zero; result_o=zero;
@ -176,6 +192,49 @@ int main (int argc, char ** argv)
Ds.Report(); Ds.Report();
std::cout << GridLogMessage << "************************************************************************ "<<std::endl; std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << " Calling Block CG for "<<Ls <<" right hand sides" <<std::endl;
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
Ds.ZeroCounters();
result_o=zero;
{
double t1=usecond();
BCG(HermOp,src_o,result_o);
double t2=usecond();
double ncall=BCGrQ.IterationsToComplete*Ls;
double flops = deodoe_flops * ncall;
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
HermOp.Report();
}
Ds.Report();
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling BCGvec "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::vector<FermionField> src_v (Ls,UrbGrid);
std::vector<FermionField> result_v(Ls,UrbGrid);
for(int s=0;s<Ls;s++) result_v[s] = zero;
for(int s=0;s<Ls;s++) {
FermionField src4(UGrid);
ExtractSlice(src4,src,s,0);
pickCheckerboard(Odd,src_v[s],src4);
}
{
double t1=usecond();
BCGv(HermOp4d,src_v,result_v);
double t2=usecond();
double ncall=BCGv.IterationsToComplete*Ls;
double flops = deodoe_flops * ncall;
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
std::cout<<GridLogMessage << "flops = "<< flops<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
// HermOp4d.Report();
}
Grid_finalize(); Grid_finalize();
} }