1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Merge branch 'develop' of github.com:paboyle/Grid into develop

This commit is contained in:
Antonin Portelli 2021-10-29 13:01:34 +01:00
commit a65a497bae
64 changed files with 1580 additions and 302 deletions

View File

@ -358,7 +358,7 @@ public:
autoView( in_v , in, AcceleratorRead); autoView( in_v , in, AcceleratorRead);
autoView( out_v , out, AcceleratorWrite); autoView( out_v , out, AcceleratorWrite);
autoView( Stencil_v , Stencil, AcceleratorRead); autoView( Stencil_v , Stencil, AcceleratorRead);
auto& geom_v = geom; int npoint = geom.npoint;
typedef LatticeView<Cobj> Aview; typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer; Vector<Aview> AcceleratorViewContainer;
@ -380,7 +380,7 @@ public:
int ptype; int ptype;
StencilEntry *SE; StencilEntry *SE;
for(int point=0;point<geom_v.npoint;point++){ for(int point=0;point<npoint;point++){
SE=Stencil_v.GetEntry(ptype,point,ss); SE=Stencil_v.GetEntry(ptype,point,ss);
@ -424,7 +424,7 @@ public:
autoView( in_v , in, AcceleratorRead); autoView( in_v , in, AcceleratorRead);
autoView( out_v , out, AcceleratorWrite); autoView( out_v , out, AcceleratorWrite);
autoView( Stencil_v , Stencil, AcceleratorRead); autoView( Stencil_v , Stencil, AcceleratorRead);
auto& geom_v = geom; int npoint = geom.npoint;
typedef LatticeView<Cobj> Aview; typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer; Vector<Aview> AcceleratorViewContainer;
@ -454,7 +454,7 @@ public:
int ptype; int ptype;
StencilEntry *SE; StencilEntry *SE;
for(int p=0;p<geom_v.npoint;p++){ for(int p=0;p<npoint;p++){
int point = points_p[p]; int point = points_p[p];
SE=Stencil_v.GetEntry(ptype,point,ss); SE=Stencil_v.GetEntry(ptype,point,ss);

View File

@ -52,6 +52,7 @@ public:
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0; virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
virtual void HermOp(const Field &in, Field &out)=0; virtual void HermOp(const Field &in, Field &out)=0;
virtual ~LinearOperatorBase(){};
}; };
@ -507,7 +508,7 @@ class SchurStaggeredOperator : public SchurOperatorBase<Field> {
virtual void MpcDag (const Field &in, Field &out){ virtual void MpcDag (const Field &in, Field &out){
Mpc(in,out); Mpc(in,out);
} }
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { virtual void MpcDagMpc(const Field &in, Field &out) {
assert(0);// Never need with staggered assert(0);// Never need with staggered
} }
}; };
@ -585,6 +586,7 @@ class HermOpOperatorFunction : public OperatorFunction<Field> {
template<typename Field> template<typename Field>
class PlainHermOp : public LinearFunction<Field> { class PlainHermOp : public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
LinearOperatorBase<Field> &_Linop; LinearOperatorBase<Field> &_Linop;
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop) PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)
@ -598,6 +600,7 @@ public:
template<typename Field> template<typename Field>
class FunctionHermOp : public LinearFunction<Field> { class FunctionHermOp : public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
OperatorFunction<Field> & _poly; OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop; LinearOperatorBase<Field> &_Linop;

View File

@ -30,13 +30,19 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class Field> class Preconditioner : public LinearFunction<Field> { template<class Field> using Preconditioner = LinearFunction<Field> ;
/*
template<class Field> class Preconditioner : public LinearFunction<Field> {
using LinearFunction<Field>::operator();
virtual void operator()(const Field &src, Field & psi)=0; virtual void operator()(const Field &src, Field & psi)=0;
}; };
*/
template<class Field> class TrivialPrecon : public Preconditioner<Field> { template<class Field> class TrivialPrecon : public Preconditioner<Field> {
public: public:
void operator()(const Field &src, Field & psi){ using Preconditioner<Field>::operator();
virtual void operator()(const Field &src, Field & psi){
psi = src; psi = src;
} }
TrivialPrecon(void){}; TrivialPrecon(void){};

View File

@ -48,6 +48,7 @@ public:
virtual void Mdiag (const Field &in, Field &out)=0; virtual void Mdiag (const Field &in, Field &out)=0;
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0; virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0; virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
virtual ~SparseMatrixBase() {};
}; };
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
@ -72,7 +73,7 @@ public:
virtual void MeooeDag (const Field &in, Field &out)=0; virtual void MeooeDag (const Field &in, Field &out)=0;
virtual void MooeeDag (const Field &in, Field &out)=0; virtual void MooeeDag (const Field &in, Field &out)=0;
virtual void MooeeInvDag (const Field &in, Field &out)=0; virtual void MooeeInvDag (const Field &in, Field &out)=0;
virtual ~CheckerBoardedSparseMatrixBase() {};
}; };
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -36,7 +36,8 @@ NAMESPACE_BEGIN(Grid);
template<class FieldD, class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> template<class FieldD, class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class MixedPrecisionBiCGSTAB : public LinearFunction<FieldD> class MixedPrecisionBiCGSTAB : public LinearFunction<FieldD>
{ {
public: public:
using LinearFunction<FieldD>::operator();
RealD Tolerance; RealD Tolerance;
RealD InnerTolerance; // Initial tolerance for inner CG. Defaults to Tolerance but can be changed RealD InnerTolerance; // Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations; Integer MaxInnerIterations;

View File

@ -35,7 +35,8 @@ NAMESPACE_BEGIN(Grid);
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> { class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
public: public:
using LinearFunction<FieldD>::operator();
RealD Tolerance; RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations; Integer MaxInnerIterations;

View File

@ -33,16 +33,19 @@ namespace Grid {
template<class Field> template<class Field>
class ZeroGuesser: public LinearFunction<Field> { class ZeroGuesser: public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
virtual void operator()(const Field &src, Field &guess) { guess = Zero(); }; virtual void operator()(const Field &src, Field &guess) { guess = Zero(); };
}; };
template<class Field> template<class Field>
class DoNothingGuesser: public LinearFunction<Field> { class DoNothingGuesser: public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
virtual void operator()(const Field &src, Field &guess) { }; virtual void operator()(const Field &src, Field &guess) { };
}; };
template<class Field> template<class Field>
class SourceGuesser: public LinearFunction<Field> { class SourceGuesser: public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
virtual void operator()(const Field &src, Field &guess) { guess = src; }; virtual void operator()(const Field &src, Field &guess) { guess = src; };
}; };
@ -57,6 +60,7 @@ private:
const unsigned int N; const unsigned int N;
public: public:
using LinearFunction<Field>::operator();
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval)
: DeflatedGuesser(_evec, _eval, _evec.size()) : DeflatedGuesser(_evec, _eval, _evec.size())
@ -87,6 +91,7 @@ private:
const std::vector<RealD> &eval_coarse; const std::vector<RealD> &eval_coarse;
public: public:
using LinearFunction<FineField>::operator();
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace, LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
const std::vector<CoarseField> &_evec_coarse, const std::vector<CoarseField> &_evec_coarse,
const std::vector<RealD> &_eval_coarse) const std::vector<RealD> &_eval_coarse)

View File

@ -67,6 +67,7 @@ public:
template<class Fobj,class CComplex,int nbasis> template<class Fobj,class CComplex,int nbasis>
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public: public:
using LinearFunction<Lattice<iVector<CComplex,nbasis > > >::operator();
typedef iVector<CComplex,nbasis > CoarseSiteVector; typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField; typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
@ -97,6 +98,7 @@ public:
template<class Fobj,class CComplex,int nbasis> template<class Fobj,class CComplex,int nbasis>
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public: public:
using LinearFunction<Lattice<iVector<CComplex,nbasis > > >::operator();
typedef iVector<CComplex,nbasis > CoarseSiteVector; typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField; typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field

View File

@ -43,7 +43,7 @@ NAMESPACE_BEGIN(Grid);
template<class Field> template<class Field>
class PrecGeneralisedConjugateResidual : public LinearFunction<Field> { class PrecGeneralisedConjugateResidual : public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
int verbose; int verbose;

View File

@ -43,7 +43,7 @@ NAMESPACE_BEGIN(Grid);
template<class Field> template<class Field>
class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction<Field> { class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
int verbose; int verbose;
@ -119,7 +119,8 @@ public:
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){ RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
RealD cp; RealD cp;
ComplexD a, b, zAz; ComplexD a, b;
// ComplexD zAz;
RealD zAAz; RealD zAAz;
ComplexD rq; ComplexD rq;
@ -146,7 +147,7 @@ public:
////////////////////////////////// //////////////////////////////////
MatTimer.Start(); MatTimer.Start();
Linop.Op(psi,Az); Linop.Op(psi,Az);
zAz = innerProduct(Az,psi); // zAz = innerProduct(Az,psi);
zAAz= norm2(Az); zAAz= norm2(Az);
MatTimer.Stop(); MatTimer.Stop();
@ -170,7 +171,7 @@ public:
LinalgTimer.Start(); LinalgTimer.Start();
zAz = innerProduct(Az,psi); // zAz = innerProduct(Az,psi);
zAAz= norm2(Az); zAAz= norm2(Az);
//p[0],q[0],qq[0] //p[0],q[0],qq[0]
@ -212,7 +213,7 @@ public:
MatTimer.Start(); MatTimer.Start();
Linop.Op(z,Az); Linop.Op(z,Az);
MatTimer.Stop(); MatTimer.Stop();
zAz = innerProduct(Az,psi); // zAz = innerProduct(Az,psi);
zAAz= norm2(Az); zAAz= norm2(Az);
LinalgTimer.Start(); LinalgTimer.Start();

View File

@ -159,7 +159,6 @@ void MemoryManager::Init(void)
char * str; char * str;
int Nc; int Nc;
int NcS;
str= getenv("GRID_ALLOC_NCACHE_LARGE"); str= getenv("GRID_ALLOC_NCACHE_LARGE");
if ( str ) { if ( str ) {

View File

@ -389,7 +389,6 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
void *shm = (void *) this->ShmBufferTranslate(dest,recv); void *shm = (void *) this->ShmBufferTranslate(dest,recv);
assert(shm!=NULL); assert(shm!=NULL);
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
acceleratorCopySynchronise(); // MPI prob slower
} }
if ( CommunicatorPolicy == CommunicatorPolicySequential ) { if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
@ -405,6 +404,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
if (nreq==0) return; if (nreq==0) return;
std::vector<MPI_Status> status(nreq); std::vector<MPI_Status> status(nreq);
acceleratorCopySynchronise();
int ierr = MPI_Waitall(nreq,&list[0],&status[0]); int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0); assert(ierr==0);
list.resize(0); list.resize(0);

View File

@ -42,7 +42,6 @@ void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator
std::cout << GridLogDebug << "\twarpSize = " << warpSize << std::endl; std::cout << GridLogDebug << "\twarpSize = " << warpSize << std::endl;
std::cout << GridLogDebug << "\tsharedMemPerBlock = " << sharedMemPerBlock << std::endl; std::cout << GridLogDebug << "\tsharedMemPerBlock = " << sharedMemPerBlock << std::endl;
std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << maxThreadsPerBlock << std::endl; std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << maxThreadsPerBlock << std::endl;
std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << warpSize << std::endl;
std::cout << GridLogDebug << "\tmultiProcessorCount = " << multiProcessorCount << std::endl; std::cout << GridLogDebug << "\tmultiProcessorCount = " << multiProcessorCount << std::endl;
if (warpSize != WARP_SIZE) { if (warpSize != WARP_SIZE) {
@ -52,6 +51,10 @@ void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator
// let the number of threads in a block be a multiple of 2, starting from warpSize // let the number of threads in a block be a multiple of 2, starting from warpSize
threads = warpSize; threads = warpSize;
if ( threads*sizeofsobj > sharedMemPerBlock ) {
std::cout << GridLogError << "The object is too large for the shared memory." << std::endl;
exit(EXIT_FAILURE);
}
while( 2*threads*sizeofsobj < sharedMemPerBlock && 2*threads <= maxThreadsPerBlock ) threads *= 2; while( 2*threads*sizeofsobj < sharedMemPerBlock && 2*threads <= maxThreadsPerBlock ) threads *= 2;
// keep all the streaming multiprocessors busy // keep all the streaming multiprocessors busy
blocks = nextPow2(multiProcessorCount); blocks = nextPow2(multiProcessorCount);

View File

@ -85,6 +85,76 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
}); });
} }
template<class vobj> inline void acceleratorPickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full, int checker_dim_half=0)
{
half.Checkerboard() = cb;
autoView(half_v, half, AcceleratorWrite);
autoView(full_v, full, AcceleratorRead);
Coordinate rdim_full = full.Grid()->_rdimensions;
Coordinate rdim_half = half.Grid()->_rdimensions;
unsigned long ndim_half = half.Grid()->_ndimension;
Coordinate checker_dim_mask_half = half.Grid()->_checker_dim_mask;
Coordinate ostride_half = half.Grid()->_ostride;
accelerator_for(ss, full.Grid()->oSites(),full.Grid()->Nsimd(),{
Coordinate coor;
int cbos;
int linear=0;
Lexicographic::CoorFromIndex(coor,ss,rdim_full);
assert(coor.size()==ndim_half);
for(int d=0;d<ndim_half;d++){
if(checker_dim_mask_half[d]) linear += coor[d];
}
cbos = (linear&0x1);
if (cbos==cb) {
int ssh=0;
for(int d=0;d<ndim_half;d++) {
if (d == checker_dim_half) ssh += ostride_half[d] * ((coor[d] / 2) % rdim_half[d]);
else ssh += ostride_half[d] * (coor[d] % rdim_half[d]);
}
coalescedWrite(half_v[ssh],full_v(ss));
}
});
}
template<class vobj> inline void acceleratorSetCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half, int checker_dim_half=0)
{
int cb = half.Checkerboard();
autoView(half_v , half, AcceleratorRead);
autoView(full_v , full, AcceleratorWrite);
Coordinate rdim_full = full.Grid()->_rdimensions;
Coordinate rdim_half = half.Grid()->_rdimensions;
unsigned long ndim_half = half.Grid()->_ndimension;
Coordinate checker_dim_mask_half = half.Grid()->_checker_dim_mask;
Coordinate ostride_half = half.Grid()->_ostride;
accelerator_for(ss,full.Grid()->oSites(),full.Grid()->Nsimd(),{
Coordinate coor;
int cbos;
int linear=0;
Lexicographic::CoorFromIndex(coor,ss,rdim_full);
assert(coor.size()==ndim_half);
for(int d=0;d<ndim_half;d++){
if(checker_dim_mask_half[d]) linear += coor[d];
}
cbos = (linear&0x1);
if (cbos==cb) {
int ssh=0;
for(int d=0;d<ndim_half;d++){
if (d == checker_dim_half) ssh += ostride_half[d] * ((coor[d] / 2) % rdim_half[d]);
else ssh += ostride_half[d] * (coor[d] % rdim_half[d]);
}
coalescedWrite(full_v[ss],half_v(ssh));
}
});
}
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
// Flexible Type Conversion for internal promotion to double as well as graceful // Flexible Type Conversion for internal promotion to double as well as graceful
// treatment of scalar-compatible types // treatment of scalar-compatible types

View File

@ -828,6 +828,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
#if (!defined(GRID_HIP)) #if (!defined(GRID_HIP))
int tshift = (mu == Nd-1) ? 1 : 0; int tshift = (mu == Nd-1) ? 1 : 0;
unsigned int LLt = GridDefaultLatt()[Tp];
//////////////////////////////////////////////// ////////////////////////////////////////////////
// GENERAL CAYLEY CASE // GENERAL CAYLEY CASE
//////////////////////////////////////////////// ////////////////////////////////////////////////
@ -880,7 +881,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
} }
std::vector<RealD> G_s(Ls,1.0); std::vector<RealD> G_s(Ls,1.0);
RealD sign = 1; // sign flip for vector/tadpole RealD sign = 1.0; // sign flip for vector/tadpole
if ( curr_type == Current::Axial ) { if ( curr_type == Current::Axial ) {
for(int s=0;s<Ls/2;s++){ for(int s=0;s<Ls/2;s++){
G_s[s] = -1.0; G_s[s] = -1.0;
@ -890,7 +891,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
auto b=this->_b; auto b=this->_b;
auto c=this->_c; auto c=this->_c;
if ( b == 1 && c == 0 ) { if ( b == 1 && c == 0 ) {
sign = -1; sign = -1.0;
} }
else { else {
std::cerr << "Error: Tadpole implementation currently unavailable for non-Shamir actions." << std::endl; std::cerr << "Error: Tadpole implementation currently unavailable for non-Shamir actions." << std::endl;
@ -934,7 +935,13 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
tmp = Cshift(tmp,mu,-1); tmp = Cshift(tmp,mu,-1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
tmp = -G_s[s]*( Utmp + gmu*Utmp ); tmp = -G_s[s]*( Utmp + gmu*Utmp );
tmp = where((lcoor>=tmin+tshift),tmp,zz); // Mask the time // Mask the time
if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice
unsigned int t0 = 0;
tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz);
} else {
tmp = where((lcoor>=tmin+tshift),tmp,zz);
}
L_Q += where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated L_Q += where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
InsertSlice(L_Q, q_out, s , 0); InsertSlice(L_Q, q_out, s , 0);

View File

@ -77,23 +77,23 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define REGISTER #define REGISTER
#ifdef GRID_SIMT #ifdef GRID_SIMT
#define LOAD_CHIMU(ptype) \ #define LOAD_CHIMU(Ptype) \
{const SiteSpinor & ref (in[offset]); \ {const SiteSpinor & ref (in[offset]); \
Chimu_00=coalescedReadPermute<ptype>(ref()(0)(0),perm,lane); \ Chimu_00=coalescedReadPermute<Ptype>(ref()(0)(0),perm,lane); \
Chimu_01=coalescedReadPermute<ptype>(ref()(0)(1),perm,lane); \ Chimu_01=coalescedReadPermute<Ptype>(ref()(0)(1),perm,lane); \
Chimu_02=coalescedReadPermute<ptype>(ref()(0)(2),perm,lane); \ Chimu_02=coalescedReadPermute<Ptype>(ref()(0)(2),perm,lane); \
Chimu_10=coalescedReadPermute<ptype>(ref()(1)(0),perm,lane); \ Chimu_10=coalescedReadPermute<Ptype>(ref()(1)(0),perm,lane); \
Chimu_11=coalescedReadPermute<ptype>(ref()(1)(1),perm,lane); \ Chimu_11=coalescedReadPermute<Ptype>(ref()(1)(1),perm,lane); \
Chimu_12=coalescedReadPermute<ptype>(ref()(1)(2),perm,lane); \ Chimu_12=coalescedReadPermute<Ptype>(ref()(1)(2),perm,lane); \
Chimu_20=coalescedReadPermute<ptype>(ref()(2)(0),perm,lane); \ Chimu_20=coalescedReadPermute<Ptype>(ref()(2)(0),perm,lane); \
Chimu_21=coalescedReadPermute<ptype>(ref()(2)(1),perm,lane); \ Chimu_21=coalescedReadPermute<Ptype>(ref()(2)(1),perm,lane); \
Chimu_22=coalescedReadPermute<ptype>(ref()(2)(2),perm,lane); \ Chimu_22=coalescedReadPermute<Ptype>(ref()(2)(2),perm,lane); \
Chimu_30=coalescedReadPermute<ptype>(ref()(3)(0),perm,lane); \ Chimu_30=coalescedReadPermute<Ptype>(ref()(3)(0),perm,lane); \
Chimu_31=coalescedReadPermute<ptype>(ref()(3)(1),perm,lane); \ Chimu_31=coalescedReadPermute<Ptype>(ref()(3)(1),perm,lane); \
Chimu_32=coalescedReadPermute<ptype>(ref()(3)(2),perm,lane); } Chimu_32=coalescedReadPermute<Ptype>(ref()(3)(2),perm,lane); }
#define PERMUTE_DIR(dir) ; #define PERMUTE_DIR(dir) ;
#else #else
#define LOAD_CHIMU(ptype) \ #define LOAD_CHIMU(Ptype) \
{const SiteSpinor & ref (in[offset]); \ {const SiteSpinor & ref (in[offset]); \
Chimu_00=ref()(0)(0);\ Chimu_00=ref()(0)(0);\
Chimu_01=ref()(0)(1);\ Chimu_01=ref()(0)(1);\
@ -109,12 +109,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Chimu_32=ref()(3)(2);} Chimu_32=ref()(3)(2);}
#define PERMUTE_DIR(dir) \ #define PERMUTE_DIR(dir) \
permute##dir(Chi_00,Chi_00); \ permute##dir(Chi_00,Chi_00); \
permute##dir(Chi_01,Chi_01);\ permute##dir(Chi_01,Chi_01); \
permute##dir(Chi_02,Chi_02);\ permute##dir(Chi_02,Chi_02); \
permute##dir(Chi_10,Chi_10); \ permute##dir(Chi_10,Chi_10); \
permute##dir(Chi_11,Chi_11);\ permute##dir(Chi_11,Chi_11); \
permute##dir(Chi_12,Chi_12); permute##dir(Chi_12,Chi_12);
#endif #endif
@ -371,88 +371,91 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
result_32-= UChi_12; result_32-= UChi_12;
#define HAND_STENCIL_LEGB(PROJ,PERM,DIR,RECON) \ #define HAND_STENCIL_LEGB(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \ {int ptype; \
offset = SE->_offset; \ SE=st.GetEntry(ptype,DIR,ss); \
local = SE->_is_local; \ auto offset = SE->_offset; \
perm = SE->_permute; \ auto local = SE->_is_local; \
if ( local ) { \ auto perm = SE->_permute; \
LOAD_CHIMU(PERM); \ if ( local ) { \
PROJ; \ LOAD_CHIMU(PERM); \
if ( perm) { \ PROJ; \
PERMUTE_DIR(PERM); \ if ( perm) { \
} \ PERMUTE_DIR(PERM); \
} else { \ } \
LOAD_CHI; \ } else { \
} \ LOAD_CHI; \
acceleratorSynchronise(); \ } \
MULT_2SPIN(DIR); \ acceleratorSynchronise(); \
RECON; MULT_2SPIN(DIR); \
RECON; }
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \ #define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
SE=&st_p[DIR+8*ss]; \ { SE=&st_p[DIR+8*ss]; \
ptype=st_perm[DIR]; \ auto ptype=st_perm[DIR]; \
offset = SE->_offset; \ auto offset = SE->_offset; \
local = SE->_is_local; \ auto local = SE->_is_local; \
perm = SE->_permute; \ auto perm = SE->_permute; \
if ( local ) { \ if ( local ) { \
LOAD_CHIMU(PERM); \ LOAD_CHIMU(PERM); \
PROJ; \ PROJ; \
if ( perm) { \ if ( perm) { \
PERMUTE_DIR(PERM); \ PERMUTE_DIR(PERM); \
} \ } \
} else { \ } else { \
LOAD_CHI; \ LOAD_CHI; \
} \ } \
acceleratorSynchronise(); \ acceleratorSynchronise(); \
MULT_2SPIN(DIR); \ MULT_2SPIN(DIR); \
RECON; RECON; }
#define HAND_STENCIL_LEGA(PROJ,PERM,DIR,RECON) \ #define HAND_STENCIL_LEGA(PROJ,PERM,DIR,RECON) \
SE=&st_p[DIR+8*ss]; \ { SE=&st_p[DIR+8*ss]; \
ptype=st_perm[DIR]; \ auto ptype=st_perm[DIR]; \
/*SE=st.GetEntry(ptype,DIR,ss);*/ \ /*SE=st.GetEntry(ptype,DIR,ss);*/ \
offset = SE->_offset; \ auto offset = SE->_offset; \
perm = SE->_permute; \ auto perm = SE->_permute; \
LOAD_CHIMU(PERM); \ LOAD_CHIMU(PERM); \
PROJ; \ PROJ; \
MULT_2SPIN(DIR); \ MULT_2SPIN(DIR); \
RECON; RECON; }
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \ #define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \ { int ptype; \
offset = SE->_offset; \ SE=st.GetEntry(ptype,DIR,ss); \
local = SE->_is_local; \ auto offset = SE->_offset; \
perm = SE->_permute; \ auto local = SE->_is_local; \
if ( local ) { \ auto perm = SE->_permute; \
LOAD_CHIMU(PERM); \ if ( local ) { \
PROJ; \ LOAD_CHIMU(PERM); \
if ( perm) { \ PROJ; \
PERMUTE_DIR(PERM); \ if ( perm) { \
} \ PERMUTE_DIR(PERM); \
} else if ( st.same_node[DIR] ) { \ } \
LOAD_CHI; \ } else if ( st.same_node[DIR] ) { \
} \ LOAD_CHI; \
acceleratorSynchronise(); \ } \
if (local || st.same_node[DIR] ) { \ acceleratorSynchronise(); \
MULT_2SPIN(DIR); \ if (local || st.same_node[DIR] ) { \
RECON; \ MULT_2SPIN(DIR); \
} \ RECON; \
acceleratorSynchronise(); } \
acceleratorSynchronise(); }
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \ #define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \ { int ptype; \
offset = SE->_offset; \ SE=st.GetEntry(ptype,DIR,ss); \
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \ auto offset = SE->_offset; \
LOAD_CHI; \ if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
MULT_2SPIN(DIR); \ LOAD_CHI; \
RECON; \ MULT_2SPIN(DIR); \
nmu++; \ RECON; \
} \ nmu++; \
acceleratorSynchronise(); } \
acceleratorSynchronise(); }
#define HAND_RESULT(ss) \ #define HAND_RESULT(ss) \
{ \ { \
SiteSpinor & ref (out[ss]); \ SiteSpinor & ref (out[ss]); \
coalescedWrite(ref()(0)(0),result_00,lane); \ coalescedWrite(ref()(0)(0),result_00,lane); \
coalescedWrite(ref()(0)(1),result_01,lane); \ coalescedWrite(ref()(0)(1),result_01,lane); \
coalescedWrite(ref()(0)(2),result_02,lane); \ coalescedWrite(ref()(0)(2),result_02,lane); \
@ -563,7 +566,6 @@ WilsonKernels<Impl>::HandDhopSiteSycl(StencilVector st_perm,StencilEntry *st_p,
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
int offset,local,perm, ptype;
StencilEntry *SE; StencilEntry *SE;
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON); HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM); HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
@ -593,9 +595,7 @@ WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,Site
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
int offset,local,perm, ptype;
StencilEntry *SE; StencilEntry *SE;
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON); HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM); HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM); HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
@ -623,8 +623,6 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
StencilEntry *SE; StencilEntry *SE;
int offset,local,perm, ptype;
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON); HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM); HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM); HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
@ -640,8 +638,8 @@ template<class Impl> accelerator_inline void
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
auto st_p = st._entries_p; // auto st_p = st._entries_p;
auto st_perm = st._permute_type; // auto st_perm = st._permute_type;
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -652,7 +650,6 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,Si
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
int offset,local,perm, ptype;
StencilEntry *SE; StencilEntry *SE;
ZERO_RESULT; ZERO_RESULT;
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM); HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
@ -670,8 +667,8 @@ template<class Impl> accelerator_inline
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
auto st_p = st._entries_p; // auto st_p = st._entries_p;
auto st_perm = st._permute_type; // auto st_perm = st._permute_type;
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
typedef decltype( coalescedRead( in[0]()(0)(0) )) Simt; typedef decltype( coalescedRead( in[0]()(0)(0) )) Simt;
@ -682,7 +679,6 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldVi
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
StencilEntry *SE; StencilEntry *SE;
int offset,local,perm, ptype;
ZERO_RESULT; ZERO_RESULT;
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM); HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM); HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
@ -699,8 +695,8 @@ template<class Impl> accelerator_inline void
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
auto st_p = st._entries_p; // auto st_p = st._entries_p;
auto st_perm = st._permute_type; // auto st_perm = st._permute_type;
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -711,7 +707,7 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,Si
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
int offset, ptype; // int offset, ptype;
StencilEntry *SE; StencilEntry *SE;
int nmu=0; int nmu=0;
ZERO_RESULT; ZERO_RESULT;
@ -730,8 +726,8 @@ template<class Impl> accelerator_inline
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
auto st_p = st._entries_p; // auto st_p = st._entries_p;
auto st_perm = st._permute_type; // auto st_perm = st._permute_type;
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
typedef decltype( coalescedRead( in[0]()(0)(0) )) Simt; typedef decltype( coalescedRead( in[0]()(0)(0) )) Simt;
@ -742,7 +738,7 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldVi
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
StencilEntry *SE; StencilEntry *SE;
int offset, ptype; // int offset, ptype;
int nmu=0; int nmu=0;
ZERO_RESULT; ZERO_RESULT;
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM); HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);

View File

@ -78,6 +78,8 @@ public:
typedef Lattice<SiteLink> LinkField; typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field; typedef Lattice<SiteField> Field;
typedef SU<Nrepresentation> Group;
// Guido: we can probably separate the types from the HMC functions // Guido: we can probably separate the types from the HMC functions
// this will create 2 kind of implementations // this will create 2 kind of implementations
// probably confusing the users // probably confusing the users
@ -118,7 +120,7 @@ public:
LinkField Pmu(P.Grid()); LinkField Pmu(P.Grid());
Pmu = Zero(); Pmu = Zero();
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu); Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ; RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ;
Pmu = Pmu*scale; Pmu = Pmu*scale;
PokeIndex<LorentzIndex>(P, Pmu, mu); PokeIndex<LorentzIndex>(P, Pmu, mu);
@ -159,15 +161,15 @@ public:
} }
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::HotConfiguration(pRNG, U); Group::HotConfiguration(pRNG, U);
} }
static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::TepidConfiguration(pRNG, U); Group::TepidConfiguration(pRNG, U);
} }
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::ColdConfiguration(pRNG, U); Group::ColdConfiguration(pRNG, U);
} }
}; };

View File

@ -1,61 +1,63 @@
Using HMC in Grid version 0.5.1 # Using HMC in Grid
These are the instructions to use the Generalised HMC on Grid version 0.5.1. These are the instructions to use the Generalised HMC on Grid as of commit `749b802`.
Disclaimer: GRID is still under active development so any information here can be changed in future releases. Disclaimer: Grid is still under active development so any information here can be changed in future releases.
Command line options ## Command line options
===================
(relevant file GenericHMCrunner.h) (relevant file `GenericHMCrunner.h`)
The initial configuration can be changed at the command line using The initial configuration can be changed at the command line using
--StartType <your choice> `--StartingType STARTING_TYPE`, where `STARTING_TYPE` is one of
valid choices, one among these `HotStart`, `ColdStart`, `TepidStart`, and `CheckpointStart`.
HotStart, ColdStart, TepidStart, CheckpointStart Default: `--StartingType HotStart`
default: HotStart
example Example:
./My_hmc_exec --StartType HotStart ```
./My_hmc_exec --StartingType HotStart
```
The CheckpointStart option uses the prefix for the configurations and rng seed files defined in your executable and the initial configuration is specified by The `CheckpointStart` option uses the prefix for the configurations and rng seed files defined in your executable and the initial configuration is specified by
--StartTrajectory <integer> `--StartingTrajectory STARTING_TRAJECTORY`, where `STARTING_TRAJECTORY` is an integer.
default: 0 Default: `--StartingTrajectory 0`
The number of trajectories for a specific run are specified at command line by The number of trajectories for a specific run are specified at command line by
--Trajectories <integer> `--Trajectories TRAJECTORIES`, where `TRAJECTORIES` is an integer.
default: 1 Default: `--Trajectories 1`
The number of thermalization steps (i.e. steps when the Metropolis acceptance check is turned off) is specified by The number of thermalization steps (i.e. steps when the Metropolis acceptance check is turned off) is specified by
--Thermalizations <integer> `--Thermalizations THERMALIZATIONS`, where `THERMALIZATIONS` is an integer.
default: 10 Default: `--Thermalizations 10`
Any other parameter is defined in the source for the executable. Any other parameter is defined in the source for the executable.
HMC controls ## HMC controls
===========
The lines The lines
```
std::vector<int> SerSeed({1, 2, 3, 4, 5}); std::vector<int> SerSeed({1, 2, 3, 4, 5});
std::vector<int> ParSeed({6, 7, 8, 9, 10}); std::vector<int> ParSeed({6, 7, 8, 9, 10});
```
define the seeds for the serial and the parallel RNG. define the seeds for the serial and the parallel RNG.
The line The line
```
TheHMC.MDparameters.set(20, 1.0);// MDsteps, traj length TheHMC.MDparameters.set(20, 1.0);// MDsteps, traj length
```
declares the number of molecular dynamics steps and the total trajectory length. declares the number of molecular dynamics steps and the total trajectory length.
Actions ## Actions
======
Action names are defined in the file Action names are defined in the directory `Grid/qcd/action`.
lib/qcd/Actions.h
Gauge actions list: Gauge actions list (from `Grid/qcd/action/gauge/Gauge.h`):
```
WilsonGaugeActionR; WilsonGaugeActionR;
WilsonGaugeActionF; WilsonGaugeActionF;
WilsonGaugeActionD; WilsonGaugeActionD;
@ -68,8 +70,9 @@ IwasakiGaugeActionD;
SymanzikGaugeActionR; SymanzikGaugeActionR;
SymanzikGaugeActionF; SymanzikGaugeActionF;
SymanzikGaugeActionD; SymanzikGaugeActionD;
```
```
ConjugateWilsonGaugeActionR; ConjugateWilsonGaugeActionR;
ConjugateWilsonGaugeActionF; ConjugateWilsonGaugeActionF;
ConjugateWilsonGaugeActionD; ConjugateWilsonGaugeActionD;
@ -82,26 +85,23 @@ ConjugateIwasakiGaugeActionD;
ConjugateSymanzikGaugeActionR; ConjugateSymanzikGaugeActionR;
ConjugateSymanzikGaugeActionF; ConjugateSymanzikGaugeActionF;
ConjugateSymanzikGaugeActionD; ConjugateSymanzikGaugeActionD;
```
Each of these action accepts one single parameter at creation time (beta).
Example for creating a Symanzik action with beta=4.0
```
SymanzikGaugeActionR(4.0)
```
Scalar actions list (from `Grid/qcd/action/scalar/Scalar.h`):
```
ScalarActionR; ScalarActionR;
ScalarActionF; ScalarActionF;
ScalarActionD; ScalarActionD;
```
The suffixes `R`, `F`, `D` in the action names refer to the `Real`
each of these action accept one single parameter at creation time (beta). (the precision is defined at compile time by the `--enable-precision` flag in the configure),
Example for creating a Symanzik action with beta=4.0 `Float` and `Double`, that force the precision of the action to be 32, 64 bit respectively.
SymanzikGaugeActionR(4.0)
The suffixes R,F,D in the action names refer to the Real
(the precision is defined at compile time by the --enable-precision flag in the configure),
Float and Double, that force the precision of the action to be 32, 64 bit respectively.

View File

@ -322,8 +322,8 @@ public:
int simd_layout = _grid->_simd_layout[dimension]; int simd_layout = _grid->_simd_layout[dimension];
int comm_dim = _grid->_processors[dimension] >1 ; int comm_dim = _grid->_processors[dimension] >1 ;
int recv_from_rank; // int recv_from_rank;
int xmit_to_rank; // int xmit_to_rank;
if ( ! comm_dim ) return 1; if ( ! comm_dim ) return 1;
if ( displacement == 0 ) return 1; if ( displacement == 0 ) return 1;

View File

@ -47,20 +47,20 @@ NAMESPACE_BEGIN(Grid);
class TypePair { class TypePair {
public: public:
T _internal[2]; T _internal[2];
TypePair<T>& operator=(const Grid::Zero& o) { accelerator TypePair<T>& operator=(const Grid::Zero& o) {
_internal[0] = Zero(); _internal[0] = Zero();
_internal[1] = Zero(); _internal[1] = Zero();
return *this; return *this;
} }
TypePair<T> operator+(const TypePair<T>& o) const { accelerator TypePair<T> operator+(const TypePair<T>& o) const {
TypePair<T> r; TypePair<T> r;
r._internal[0] = _internal[0] + o._internal[0]; r._internal[0] = _internal[0] + o._internal[0];
r._internal[1] = _internal[1] + o._internal[1]; r._internal[1] = _internal[1] + o._internal[1];
return r; return r;
} }
TypePair<T>& operator+=(const TypePair<T>& o) { accelerator TypePair<T>& operator+=(const TypePair<T>& o) {
_internal[0] += o._internal[0]; _internal[0] += o._internal[0];
_internal[1] += o._internal[1]; _internal[1] += o._internal[1];
return *this; return *this;

View File

@ -95,7 +95,7 @@ void acceleratorInit(void)
#endif #endif
cudaSetDevice(device); cudaSetDevice(device);
cudaStreamCreate(&copyStream);
const int len=64; const int len=64;
char busid[len]; char busid[len];
if( rank == world_rank ) { if( rank == world_rank ) {

View File

@ -95,6 +95,7 @@ void acceleratorInit(void);
////////////////////////////////////////////// //////////////////////////////////////////////
#ifdef GRID_CUDA #ifdef GRID_CUDA
#include <cuda.h> #include <cuda.h>
#ifdef __CUDA_ARCH__ #ifdef __CUDA_ARCH__
@ -133,11 +134,7 @@ inline void cuda_mem(void)
}; \ }; \
dim3 cu_threads(nsimd,acceleratorThreads(),1); \ dim3 cu_threads(nsimd,acceleratorThreads(),1); \
dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \ dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \
std::cout << "========================== CUDA KERNEL CALL\n"; \
cuda_mem(); \
LambdaApply<<<cu_blocks,cu_threads>>>(num1,num2,nsimd,lambda); \ LambdaApply<<<cu_blocks,cu_threads>>>(num1,num2,nsimd,lambda); \
cuda_mem(); \
std::cout << "========================== CUDA KERNEL DONE\n"; \
} }
#define accelerator_for6dNB(iter1, num1, \ #define accelerator_for6dNB(iter1, num1, \

View File

@ -88,7 +88,7 @@ public:
// Coordinate class, maxdims = 8 for now. // Coordinate class, maxdims = 8 for now.
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
#define GRID_MAX_LATTICE_DIMENSION (8) #define GRID_MAX_LATTICE_DIMENSION (8)
#define GRID_MAX_SIMD (16) #define GRID_MAX_SIMD (32)
static constexpr int MaxDims = GRID_MAX_LATTICE_DIMENSION; static constexpr int MaxDims = GRID_MAX_LATTICE_DIMENSION;

View File

@ -137,7 +137,7 @@ int main (int argc, char ** argv)
Eigen::MatrixXd mean(nVol, 4), stdDev(nVol, 4), rob(nVol, 4); Eigen::MatrixXd mean(nVol, 4), stdDev(nVol, 4), rob(nVol, 4);
Eigen::VectorXd avMean(4), avStdDev(4), avRob(4); Eigen::VectorXd avMean(4), avStdDev(4), avRob(4);
double n = BENCH_IO_NPASS; // double n = BENCH_IO_NPASS;
stats(mean, stdDev, perf); stats(mean, stdDev, perf);
stats(avMean, avStdDev, avPerf); stats(avMean, avStdDev, avPerf);
@ -164,7 +164,7 @@ int main (int argc, char ** argv)
mean(volInd(l), gWrite), stdDev(volInd(l), gWrite)); mean(volInd(l), gWrite), stdDev(volInd(l), gWrite));
} }
MSG << std::endl; MSG << std::endl;
MSG << "Robustness of individual results, in \%. (rob = 100\% - std dev / mean)" << std::endl; MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)" << std::endl;
MSG << std::endl; MSG << std::endl;
grid_printf("%4s %12s %12s %12s %12s\n", grid_printf("%4s %12s %12s %12s %12s\n",
"L", "std read", "std write", "Grid read", "Grid write"); "L", "std read", "std write", "Grid read", "Grid write");
@ -185,7 +185,7 @@ int main (int argc, char ** argv)
avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite), avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite),
avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite)); avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
MSG << std::endl; MSG << std::endl;
MSG << "Robustness of volume-averaged results, in \%. (rob = 100\% - std dev / mean)" << std::endl; MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)" << std::endl;
MSG << std::endl; MSG << std::endl;
grid_printf("%12s %12s %12s %12s\n", grid_printf("%12s %12s %12s %12s\n",
"std read", "std write", "Grid read", "Grid write"); "std read", "std write", "Grid read", "Grid write");

View File

@ -142,7 +142,7 @@ public:
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
} }
int ncomm; // int ncomm;
double dbytes; double dbytes;
for(int dir=0;dir<8;dir++) { for(int dir=0;dir<8;dir++) {
@ -290,7 +290,7 @@ public:
LatticeSU4 z(&Grid); z=Zero(); LatticeSU4 z(&Grid); z=Zero();
LatticeSU4 x(&Grid); x=Zero(); LatticeSU4 x(&Grid); x=Zero();
LatticeSU4 y(&Grid); y=Zero(); LatticeSU4 y(&Grid); y=Zero();
double a=2.0; // double a=2.0;
uint64_t Nloop=NLOOP; uint64_t Nloop=NLOOP;

View File

@ -72,7 +72,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
std::vector<double> t_time(Nloop); std::vector<double> t_time(Nloop);
time_statistics timestat; // time_statistics timestat;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange from host memory "<<std::endl; std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange from host memory "<<std::endl;

View File

@ -184,8 +184,10 @@ int main (int argc, char ** argv)
double bytes=1.0*vol*Nvec*sizeof(Real); double bytes=1.0*vol*Nvec*sizeof(Real);
double flops=vol*Nvec*2;// mul,add double flops=vol*Nvec*2;// mul,add
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"
<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"
<<(stop-start)/1000./1000.<< "\t\t " <<std::endl;
assert(nn==nn);
} }
Grid_finalize(); Grid_finalize();

View File

@ -4,7 +4,7 @@ using namespace Grid;
template<class Field> template<class Field>
void SimpleConjugateGradient(LinearOperatorBase<Field> &HPDop,const Field &b, Field &x) void SimpleConjugateGradient(LinearOperatorBase<Field> &HPDop,const Field &b, Field &x)
{ {
RealD cp, c, alpha, d, beta, ssq, qq; RealD cp, c, alpha, d, beta, ssq;
RealD Tolerance=1.0e-10; RealD Tolerance=1.0e-10;
int MaxIterations=10000; int MaxIterations=10000;

View File

@ -0,0 +1,539 @@
/*
* Warning: This code illustrative only: not well tested, and not meant for production use
* without regression / tests being applied
*/
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
typedef SpinColourMatrix Propagator;
typedef SpinColourVector Fermion;
typedef PeriodicGimplR GimplR;
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
GridBase *grid;
GaugeField U;
CovariantLaplacianCshift(GaugeField &_U) :
grid(_U.Grid()),
U(_U) { };
virtual GridBase *Grid(void) { return grid; };
virtual void M (const Field &in, Field &out)
{
out=Zero();
for(int mu=0;mu<Nd-1;mu++) {
GaugeLinkField Umu = PeekIndex<LorentzIndex>(U, mu); // NB: Inefficent
out = out - Gimpl::CovShiftForward(Umu,mu,in);
out = out - Gimpl::CovShiftBackward(Umu,mu,in);
out = out + 2.0*in;
}
};
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
};
void MakePhase(Coordinate mom,LatticeComplex &phase)
{
GridBase *grid = phase.Grid();
auto latt_size = grid->GlobalDimensions();
ComplexD ci(0.0,1.0);
phase=Zero();
LatticeComplex coor(phase.Grid());
for(int mu=0;mu<Nd;mu++){
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(coor,mu);
phase = phase + (TwoPiL * mom[mu]) * coor;
}
phase = exp(phase*ci);
}
void LinkSmear(int nstep, RealD rho,LatticeGaugeField &Uin,LatticeGaugeField &Usmr)
{
Smear_Stout<GimplR> Stout(rho);
LatticeGaugeField Utmp(Uin.Grid());
Utmp = Uin;
for(int i=0;i<nstep;i++){
Stout.smear(Usmr,Utmp);
Utmp = Usmr;
}
}
void PointSource(Coordinate &coor,LatticePropagator &source)
{
// Coordinate coor({0,0,0,0});
source=Zero();
SpinColourMatrix kronecker; kronecker=1.0;
pokeSite(kronecker,source,coor);
}
void GFWallSource(int tslice,LatticePropagator &source)
{
GridBase *grid = source.Grid();
LatticeComplex one(grid); one = ComplexD(1.0,0.0);
LatticeComplex zz(grid); zz=Zero();
LatticeInteger t(grid);
LatticeCoordinate(t,Tdir);
one = where(t==Integer(tslice), one, zz);
source = 1.0;
source = source * one;
}
void Z2WallSource(GridParallelRNG &RNG,int tslice,LatticePropagator &source)
{
GridBase *grid = source.Grid();
LatticeComplex noise(grid);
LatticeComplex zz(grid); zz=Zero();
LatticeInteger t(grid);
RealD nrm=1.0/sqrt(2);
bernoulli(RNG, noise); // 0,1 50:50
noise = (2.*noise - Complex(1,1))*nrm;
LatticeCoordinate(t,Tdir);
noise = where(t==Integer(tslice), noise, zz);
source = 1.0;
source = source*noise;
std::cout << " Z2 wall " << norm2(source) << std::endl;
}
void GaugeFix(LatticeGaugeField &U,LatticeGaugeField &Ufix)
{
Real alpha=0.05;
Real plaq=WilsonLoops<GimplR>::avgPlaquette(U);
std::cout << " Initial plaquette "<<plaq << std::endl;
LatticeColourMatrix xform(U.Grid());
Ufix = U;
int orthog=Nd-1;
FourierAcceleratedGaugeFixer<GimplR>::SteepestDescentGaugeFix(Ufix,xform,alpha,100000,1.0e-14, 1.0e-14,true,orthog);
plaq=WilsonLoops<GimplR>::avgPlaquette(Ufix);
std::cout << " Final plaquette "<<plaq << std::endl;
}
template<class Field>
void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared)
{
typedef CovariantLaplacianCshift <GimplR,Field> Laplacian_t;
Laplacian_t Laplacian(U);
Integer Iterations = 40;
Real width = 2.0;
Real coeff = (width*width) / Real(4*Iterations);
Field tmp(U.Grid());
smeared=unsmeared;
// chi = (1-p^2/2N)^N kronecker
for(int n = 0; n < Iterations; ++n) {
Laplacian.M(smeared,tmp);
smeared = smeared - coeff*tmp;
std::cout << " smear iter " << n<<" " <<norm2(smeared)<<std::endl;
}
}
void GaussianSource(Coordinate &site,LatticeGaugeField &U,LatticePropagator &source)
{
LatticePropagator tmp(source.Grid());
PointSource(site,source);
std::cout << " GaussianSource Kronecker "<< norm2(source)<<std::endl;
tmp = source;
GaussianSmear(U,tmp,source);
std::cout << " GaussianSource Smeared "<< norm2(source)<<std::endl;
}
void GaussianWallSource(GridParallelRNG &RNG,int tslice,LatticeGaugeField &U,LatticePropagator &source)
{
Z2WallSource(RNG,tslice,source);
auto tmp = source;
GaussianSmear(U,tmp,source);
}
void SequentialSource(int tslice,Coordinate &mom,LatticePropagator &spectator,LatticePropagator &source)
{
assert(mom.size()==Nd);
assert(mom[Tdir] == 0);
GridBase * grid = spectator.Grid();
LatticeInteger ts(grid);
LatticeCoordinate(ts,Tdir);
source = Zero();
source = where(ts==Integer(tslice),spectator,source); // Stick in a slice of the spectator, zero everywhere else
LatticeComplex phase(grid);
MakePhase(mom,phase);
source = source *phase;
}
template<class Action>
void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator)
{
GridBase *UGrid = D.GaugeGrid();
GridBase *FGrid = D.FermionGrid();
LatticeFermion src4 (UGrid);
LatticeFermion src5 (FGrid);
LatticeFermion result5(FGrid);
LatticeFermion result4(UGrid);
ConjugateGradient<LatticeFermion> CG(1.0e-12,100000);
SchurRedBlackDiagTwoSolve<LatticeFermion> schur(CG);
ZeroGuesser<LatticeFermion> ZG; // Could be a DeflatedGuesser if have eigenvectors
for(int s=0;s<Nd;s++){
for(int c=0;c<Nc;c++){
PropToFerm<Action>(src4,source,s,c);
D.ImportPhysicalFermionSource(src4,src5);
result5=Zero();
schur(D,src5,result5,ZG);
std::cout<<GridLogMessage
<<"spin "<<s<<" color "<<c
<<" norm2(src5d) " <<norm2(src5)
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
D.ExportPhysicalFermionSolution(result5,result4);
FermToProp<Action>(propagator,result4,s,c);
}
}
}
class MesonFile: Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonFile, std::vector<std::vector<Complex> >, data);
};
void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase)
{
const int nchannel=4;
Gamma::Algebra Gammas[nchannel][2] = {
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::Gamma5},
{Gamma::Algebra::GammaTGamma5,Gamma::Algebra::GammaTGamma5},
{Gamma::Algebra::GammaTGamma5,Gamma::Algebra::Gamma5},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaTGamma5}
};
Gamma G5(Gamma::Algebra::Gamma5);
LatticeComplex meson_CF(q1.Grid());
MesonFile MF;
for(int ch=0;ch<nchannel;ch++){
Gamma Gsrc(Gammas[ch][0]);
Gamma Gsnk(Gammas[ch][1]);
meson_CF = trace(G5*adj(q1)*G5*Gsnk*q2*adj(Gsrc));
std::vector<TComplex> meson_T;
sliceSum(meson_CF,meson_T, Tdir);
int nt=meson_T.size();
std::vector<Complex> corr(nt);
for(int t=0;t<nt;t++){
corr[t] = TensorRemove(meson_T[t]); // Yes this is ugly, not figured a work around
std::cout << " channel "<<ch<<" t "<<t<<" " <<corr[t]<<std::endl;
}
MF.data.push_back(corr);
}
{
XmlWriter WR(file);
write(WR,"MesonFile",MF);
}
}
void Meson3pt(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase)
{
const int nchannel=4;
Gamma::Algebra Gammas[nchannel][2] = {
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaX},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaY},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaZ},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaT}
};
Gamma G5(Gamma::Algebra::Gamma5);
LatticeComplex meson_CF(q1.Grid());
MesonFile MF;
for(int ch=0;ch<nchannel;ch++){
Gamma Gsrc(Gammas[ch][0]);
Gamma Gsnk(Gammas[ch][1]);
meson_CF = trace(G5*adj(q1)*G5*Gsnk*q2*adj(Gsrc));
std::vector<TComplex> meson_T;
sliceSum(meson_CF,meson_T, Tdir);
int nt=meson_T.size();
std::vector<Complex> corr(nt);
for(int t=0;t<nt;t++){
corr[t] = TensorRemove(meson_T[t]); // Yes this is ugly, not figured a work around
std::cout << " channel "<<ch<<" t "<<t<<" " <<corr[t]<<std::endl;
}
MF.data.push_back(corr);
}
{
XmlWriter WR(file);
write(WR,"MesonFile",MF);
}
}
void WallSinkMesonTrace(std::string file,std::vector<Propagator> &q1,std::vector<Propagator> &q2)
{
const int nchannel=4;
Gamma::Algebra Gammas[nchannel][2] = {
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::Gamma5},
{Gamma::Algebra::GammaTGamma5,Gamma::Algebra::GammaTGamma5},
{Gamma::Algebra::GammaTGamma5,Gamma::Algebra::Gamma5},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaTGamma5}
};
Gamma G5(Gamma::Algebra::Gamma5);
int nt=q1.size();
std::vector<Complex> meson_CF(nt);
MesonFile MF;
for(int ch=0;ch<nchannel;ch++){
Gamma Gsrc(Gammas[ch][0]);
Gamma Gsnk(Gammas[ch][1]);
std::vector<Complex> corr(nt);
for(int t=0;t<nt;t++){
meson_CF[t] = trace(G5*adj(q1[t])*G5*Gsnk*q2[t]*adj(Gsrc));
corr[t] = TensorRemove(meson_CF[t]); // Yes this is ugly, not figured a work around
std::cout << " channel "<<ch<<" t "<<t<<" " <<corr[t]<<std::endl;
}
MF.data.push_back(corr);
}
{
XmlWriter WR(file);
write(WR,"MesonFile",MF);
}
}
int make_idx(int p, int m,int nmom)
{
if (m==0) return p;
assert(p==0);
return nmom + m - 1;
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
// Double precision grids
auto latt = GridDefaultLatt();
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
LatticeGaugeField Umu(UGrid);
LatticeGaugeField Utmp(UGrid);
LatticeGaugeField Usmr(UGrid);
std::string config;
if( argc > 1 && argv[1][0] != '-' )
{
std::cout<<GridLogMessage <<"Loading configuration from "<<argv[1]<<std::endl;
FieldMetaData header;
NerscIO::readConfiguration(Umu, header, argv[1]);
config=argv[1];
}
else
{
std::cout<<GridLogMessage <<"Using hot configuration"<<std::endl;
SU<Nc>::ColdConfiguration(Umu);
config="ColdConfig";
}
// GaugeFix(Umu,Utmp);
// Umu=Utmp;
int nsmr=3;
RealD rho=0.1;
LinkSmear(nsmr,rho,Umu,Usmr);
std::vector<int> smeared_link({ 0,0,1} );
std::vector<RealD> masses({ 0.004,0.02477,0.447} ); // u/d, s, c ??
std::vector<RealD> M5s ({ 1.8,1.8,1.0} );
std::vector<RealD> bs ({ 1.0,1.0,1.5} ); // DDM
std::vector<RealD> cs ({ 0.0,0.0,0.5} ); // DDM
std::vector<int> Ls_s ({ 16,16,12} );
std::vector<GridCartesian *> FGrids;
std::vector<GridRedBlackCartesian *> FrbGrids;
std::vector<Coordinate> momenta;
momenta.push_back(Coordinate({0,0,0,0}));
momenta.push_back(Coordinate({1,0,0,0}));
momenta.push_back(Coordinate({2,0,0,0}));
int nmass = masses.size();
int nmom = momenta.size();
std::vector<MobiusFermionR *> FermActs;
std::cout<<GridLogMessage <<"======================"<<std::endl;
std::cout<<GridLogMessage <<"MobiusFermion action as Scaled Shamir kernel"<<std::endl;
std::cout<<GridLogMessage <<"======================"<<std::endl;
std::vector<Complex> boundary = {1,1,1,-1};
typedef MobiusFermionR FermionAction;
FermionAction::ImplParams Params(boundary);
for(int m=0;m<masses.size();m++) {
RealD mass = masses[m];
RealD M5 = M5s[m];
RealD b = bs[m];
RealD c = cs[m];
int Ls = Ls_s[m];
if ( smeared_link[m] ) Utmp = Usmr;
else Utmp = Umu;
FGrids.push_back(SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid));
FrbGrids.push_back(SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid));
FermActs.push_back(new MobiusFermionR(Utmp,*FGrids[m],*FrbGrids[m],*UGrid,*UrbGrid,mass,M5,b,c,Params));
}
LatticePropagator z2wall_source(UGrid);
LatticePropagator gfwall_source(UGrid);
LatticePropagator phased_prop(UGrid);
int tslice = 0;
int tseq=(tslice+16)%latt[Nd-1];
//////////////////////////////////////////////////////////////////////
// RNG seeded for Z2 wall
//////////////////////////////////////////////////////////////////////
// You can manage seeds however you like.
// Recommend SeedUniqueString.
//////////////////////////////////////////////////////////////////////
GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString("Study2-Source_Z2_p_0_0_0_t_0-880");
Z2WallSource (RNG4,tslice,z2wall_source);
GFWallSource (tslice,gfwall_source);
std::vector<LatticeComplex> phase(nmom,UGrid);
for(int m=0;m<nmom;m++){
MakePhase(momenta[m],phase[m]);
}
std::vector<LatticePropagator> Z2Props (nmom+nmass-1,UGrid);
std::vector<LatticePropagator> GFProps (nmom+nmass-1,UGrid);
for(int p=0;p<nmom;p++) {
int m=0;
int idx = make_idx(p,m,nmom);
phased_prop = z2wall_source * phase[p];
Solve(*FermActs[m],phased_prop ,Z2Props[idx]);
phased_prop = gfwall_source * phase[p];
Solve(*FermActs[m],phased_prop ,GFProps[idx]);
}
for(int m=1;m<nmass;m++) {
int p=0;
int idx = make_idx(p,m,nmom);
phased_prop = z2wall_source;
Solve(*FermActs[m],phased_prop ,Z2Props[idx]);
phased_prop = gfwall_source;
Solve(*FermActs[m],phased_prop ,GFProps[idx]);
}
std::vector<std::vector<Propagator> > wsnk_z2Props(nmom+nmass-1);
std::vector<std::vector<Propagator> > wsnk_gfProps(nmom+nmass-1);
// Non-zero kaon and point and D two point
// WW stick momentum on m1 (lighter)
// zero momentum on m2
for(int m1=0;m1<nmass;m1++) {
for(int m2=m1;m2<nmass;m2++) {
int pmax = (m1==0)? nmom:1;
for(int p=0;p<pmax;p++){
std::stringstream ssg,ssz;
std::stringstream wssg,wssz;
int idx1 = make_idx(p,m1,nmom);
int idx2 = make_idx(0,m2,nmom);
/// Point sinks
ssg<<config<<"_p"<<p<< "_m" << m1 << "_m"<< m2 << "_p_gf_meson.xml";
ssz<<config<<"_p"<<p<< "_m" << m1 << "_m"<< m2 << "_p_z2_meson.xml";
MesonTrace(ssz.str(),Z2Props[idx1],Z2Props[idx2],phase[p]); // Q1 is conjugated
MesonTrace(ssg.str(),GFProps[idx1],GFProps[idx2],phase[p]);
/// Wall sinks
wssg<<config<<"_p"<<p<< "_m" << m1 << "_m"<< m2 << "_w_gf_meson.xml";
wssz<<config<<"_p"<<p<< "_m" << m1 << "_m"<< m2 << "_w_z2_meson.xml";
phased_prop = GFProps[m2] * phase[p];
sliceSum(phased_prop,wsnk_gfProps[m1],Tdir);
sliceSum(GFProps[m1],wsnk_gfProps[m2],Tdir);
WallSinkMesonTrace(wssg.str(),wsnk_gfProps[m1],wsnk_gfProps[m2]);
phased_prop = Z2Props[m2] * phase[p];
sliceSum(phased_prop,wsnk_gfProps[m1],Tdir);
sliceSum(Z2Props[m1],wsnk_gfProps[m2],Tdir);
WallSinkMesonTrace(wssz.str(),wsnk_z2Props[m1],wsnk_z2Props[m2]);
}
}}
/////////////////////////////////////
// Sequential solves
/////////////////////////////////////
LatticePropagator seq_wsnk_z2src(UGrid);
LatticePropagator seq_wsnk_gfsrc(UGrid);
LatticePropagator seq_psnk_z2src(UGrid);
LatticePropagator seq_psnk_gfsrc(UGrid);
LatticePropagator source(UGrid);
for(int m=0;m<nmass-1;m++){
int spect_idx = make_idx(0,m,nmom);
int charm=nmass-1;
SequentialSource(tseq,momenta[0],GFProps[spect_idx],source);
Solve(*FermActs[charm],source,seq_psnk_gfsrc);
SequentialSource(tseq,momenta[0],Z2Props[spect_idx],source);
Solve(*FermActs[charm],source,seq_psnk_z2src);
// Todo need wall sequential solve
for(int p=0;p<nmom;p++){
int active_idx = make_idx(p,0,nmom);
std::stringstream seq_3pt_p_z2;
std::stringstream seq_3pt_p_gf;
std::stringstream seq_3pt_w_z2;
std::stringstream seq_3pt_w_gf;
seq_3pt_p_z2 <<config<<"_3pt_p"<<p<< "_m" << m << "_p_z2_meson.xml";
seq_3pt_p_gf <<config<<"_3pt_p"<<p<< "_m" << m << "_p_gf_meson.xml";
seq_3pt_w_z2 <<config<<"_3pt_p"<<p<< "_m" << m << "_w_z2_meson.xml";
seq_3pt_w_gf <<config<<"_3pt_p"<<p<< "_m" << m << "_w_gf_meson.xml";
Meson3pt(seq_3pt_p_gf.str(),GFProps[active_idx],seq_psnk_gfsrc,phase[p]);
Meson3pt(seq_3pt_p_z2.str(),Z2Props[active_idx],seq_psnk_z2src,phase[p]);
}
}
Grid_finalize();
}

View File

@ -9,6 +9,7 @@ using namespace std;
using namespace Grid; using namespace Grid;
typedef SpinColourMatrix Propagator; typedef SpinColourMatrix Propagator;
typedef SpinColourVector Fermion; typedef SpinColourVector Fermion;
typedef PeriodicGimplR GimplR;
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field> template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
{ {
@ -55,6 +56,16 @@ void MakePhase(Coordinate mom,LatticeComplex &phase)
} }
phase = exp(phase*ci); phase = exp(phase*ci);
} }
void LinkSmear(int nstep, RealD rho,LatticeGaugeField &Uin,LatticeGaugeField &Usmr)
{
Smear_Stout<GimplR> Stout(rho);
LatticeGaugeField Utmp(Uin.Grid());
Utmp = Uin;
for(int i=0;i<nstep;i++){
Stout.smear(Usmr,Utmp);
Utmp = Usmr;
}
}
void PointSource(Coordinate &coor,LatticePropagator &source) void PointSource(Coordinate &coor,LatticePropagator &source)
{ {
// Coordinate coor({0,0,0,0}); // Coordinate coor({0,0,0,0});
@ -97,23 +108,23 @@ void GaugeFix(LatticeGaugeField &U,LatticeGaugeField &Ufix)
{ {
Real alpha=0.05; Real alpha=0.05;
Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(U); Real plaq=WilsonLoops<GimplR>::avgPlaquette(U);
std::cout << " Initial plaquette "<<plaq << std::endl; std::cout << " Initial plaquette "<<plaq << std::endl;
LatticeColourMatrix xform(U.Grid()); LatticeColourMatrix xform(U.Grid());
Ufix = U; Ufix = U;
int orthog=Nd-1; int orthog=Nd-1;
FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Ufix,xform,alpha,10000,1.0e-12, 1.0e-12,true,orthog); FourierAcceleratedGaugeFixer<GimplR>::SteepestDescentGaugeFix(Ufix,xform,alpha,100000,1.0e-14, 1.0e-14,true,orthog);
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Ufix); plaq=WilsonLoops<GimplR>::avgPlaquette(Ufix);
std::cout << " Final plaquette "<<plaq << std::endl; std::cout << " Final plaquette "<<plaq << std::endl;
} }
template<class Field> template<class Field>
void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared) void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared)
{ {
typedef CovariantLaplacianCshift <PeriodicGimplR,Field> Laplacian_t; typedef CovariantLaplacianCshift <GimplR,Field> Laplacian_t;
Laplacian_t Laplacian(U); Laplacian_t Laplacian(U);
Integer Iterations = 40; Integer Iterations = 40;
@ -167,19 +178,21 @@ void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator)
GridBase *UGrid = D.GaugeGrid(); GridBase *UGrid = D.GaugeGrid();
GridBase *FGrid = D.FermionGrid(); GridBase *FGrid = D.FermionGrid();
LatticeFermion src4 (UGrid); LatticeFermion src4 (UGrid); src4 = Zero();
LatticeFermion src5 (FGrid); LatticeFermion src5 (FGrid);
LatticeFermion result5(FGrid); LatticeFermion result5(FGrid);
LatticeFermion result4(UGrid); LatticeFermion result4(UGrid);
ConjugateGradient<LatticeFermion> CG(1.0e-8,100000); ConjugateGradient<LatticeFermion> CG(1.0e-12,100000);
SchurRedBlackDiagMooeeSolve<LatticeFermion> schur(CG); SchurRedBlackDiagTwoSolve<LatticeFermion> schur(CG);
ZeroGuesser<LatticeFermion> ZG; // Could be a DeflatedGuesser if have eigenvectors ZeroGuesser<LatticeFermion> ZG; // Could be a DeflatedGuesser if have eigenvectors
std::cout<<GridLogMessage<< " source4 "<<norm2(source)<<std::endl;
for(int s=0;s<Nd;s++){ for(int s=0;s<Nd;s++){
for(int c=0;c<Nc;c++){ for(int c=0;c<Nc;c++){
PropToFerm<Action>(src4,source,s,c); PropToFerm<Action>(src4,source,s,c);
std::cout<<GridLogMessage<< s<<c<<" src4 "<<norm2(src4)<<std::endl;
D.ImportPhysicalFermionSource(src4,src5); D.ImportPhysicalFermionSource(src4,src5);
std::cout<<GridLogMessage<< s<<c<<" src5 "<<norm2(src5)<<std::endl;
result5=Zero(); result5=Zero();
schur(D,src5,result5,ZG); schur(D,src5,result5,ZG);
@ -287,15 +300,10 @@ int main (int argc, char ** argv)
GridDefaultMpi()); GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
//////////////////////////////////////////////////////////////////////
// You can manage seeds however you like.
// Recommend SeedUniqueString.
//////////////////////////////////////////////////////////////////////
std::vector<int> seeds4({1,2,3,4});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
LatticeGaugeField Ufixed(UGrid); LatticeGaugeField Utmp(UGrid);
LatticeGaugeField Usmr(UGrid);
std::string config; std::string config;
if( argc > 1 && argv[1][0] != '-' ) if( argc > 1 && argv[1][0] != '-' )
{ {
@ -308,13 +316,20 @@ int main (int argc, char ** argv)
{ {
std::cout<<GridLogMessage <<"Using hot configuration"<<std::endl; std::cout<<GridLogMessage <<"Using hot configuration"<<std::endl;
SU<Nc>::ColdConfiguration(Umu); SU<Nc>::ColdConfiguration(Umu);
// SU<Nc>::HotConfiguration(RNG4,Umu); config="ColdConfig";
config="HotConfig";
} }
GaugeFix(Umu,Ufixed); // GaugeFix(Umu,Utmp);
Umu=Ufixed; // Umu=Utmp;
int nsmr=3;
RealD rho=0.1;
RealD plaq_gf =WilsonLoops<GimplR>::avgPlaquette(Umu);
LinkSmear(nsmr,rho,Umu,Usmr);
RealD plaq_smr=WilsonLoops<GimplR>::avgPlaquette(Usmr);
std::cout << GridLogMessage << " GF Plaquette " <<plaq_gf<<std::endl;
std::cout << GridLogMessage << " SM Plaquette " <<plaq_smr<<std::endl;
std::vector<int> smeared_link({ 0,0,1} );
std::vector<RealD> masses({ 0.004,0.02477,0.447} ); // u/d, s, c ?? std::vector<RealD> masses({ 0.004,0.02477,0.447} ); // u/d, s, c ??
std::vector<RealD> M5s ({ 1.8,1.8,1.0} ); std::vector<RealD> M5s ({ 1.8,1.8,1.0} );
std::vector<RealD> bs ({ 1.0,1.0,1.5} ); // DDM std::vector<RealD> bs ({ 1.0,1.0,1.5} ); // DDM
@ -330,6 +345,9 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage <<"======================"<<std::endl; std::cout<<GridLogMessage <<"======================"<<std::endl;
std::cout<<GridLogMessage <<"MobiusFermion action as Scaled Shamir kernel"<<std::endl; std::cout<<GridLogMessage <<"MobiusFermion action as Scaled Shamir kernel"<<std::endl;
std::cout<<GridLogMessage <<"======================"<<std::endl; std::cout<<GridLogMessage <<"======================"<<std::endl;
std::vector<Complex> boundary = {1,1,1,-1};
typedef MobiusFermionR FermionAction;
FermionAction::ImplParams Params(boundary);
for(int m=0;m<masses.size();m++) { for(int m=0;m<masses.size();m++) {
@ -339,30 +357,40 @@ int main (int argc, char ** argv)
RealD c = cs[m]; RealD c = cs[m];
int Ls = Ls_s[m]; int Ls = Ls_s[m];
if ( smeared_link[m] ) Utmp = Usmr;
else Utmp = Umu;
FGrids.push_back(SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid)); FGrids.push_back(SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid));
FrbGrids.push_back(SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid)); FrbGrids.push_back(SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid));
FermActs.push_back(new MobiusFermionR(Umu,*FGrids[m],*FrbGrids[m],*UGrid,*UrbGrid,mass,M5,b,c)); FermActs.push_back(new MobiusFermionR(Utmp,*FGrids[m],*FrbGrids[m],*UGrid,*UrbGrid,mass,M5,b,c,Params));
} }
LatticePropagator point_source(UGrid);
LatticePropagator z2wall_source(UGrid); LatticePropagator z2wall_source(UGrid);
LatticePropagator gfwall_source(UGrid); LatticePropagator gfwall_source(UGrid);
Coordinate Origin({0,0,0,0}); int tslice = 0;
PointSource (Origin,point_source); //////////////////////////////////////////////////////////////////////
Z2WallSource (RNG4,0,z2wall_source); // RNG seeded for Z2 wall
GFWallSource (0,gfwall_source); //////////////////////////////////////////////////////////////////////
// You can manage seeds however you like.
std::vector<LatticePropagator> PointProps(nmass,UGrid); // Recommend SeedUniqueString.
std::vector<LatticePropagator> GaussProps(nmass,UGrid); //////////////////////////////////////////////////////////////////////
GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString("Study2-Source_Z2_p_0_0_0_t_0-880");
Z2WallSource (RNG4,tslice,z2wall_source);
GFWallSource (tslice,gfwall_source);
std::vector<LatticePropagator> Z2Props (nmass,UGrid); std::vector<LatticePropagator> Z2Props (nmass,UGrid);
std::vector<LatticePropagator> GFProps (nmass,UGrid); std::vector<LatticePropagator> GFProps (nmass,UGrid);
for(int m=0;m<nmass;m++) { for(int m=0;m<nmass;m++) {
std::cout << GridLogMessage << " Mass " <<m << " z2wall source "<<norm2(z2wall_source)<<std::endl;
Solve(*FermActs[m],z2wall_source ,Z2Props[m]); Solve(*FermActs[m],z2wall_source ,Z2Props[m]);
std::cout << GridLogMessage << " Mass " <<m << " gfwall source "<<norm2(gfwall_source)<<std::endl;
Solve(*FermActs[m],gfwall_source ,GFProps[m]); Solve(*FermActs[m],gfwall_source ,GFProps[m]);
std::cout << GridLogMessage << " Mass " <<m << " z2wall source "<<norm2(z2wall_source)<< " " << norm2(gfwall_source)<<std::endl;
} }
@ -383,14 +411,15 @@ int main (int argc, char ** argv)
std::stringstream wssg,wssz; std::stringstream wssg,wssz;
/// Point sinks /// Point sinks
ssg<<config<< "_m" << m1 << "_m"<< m2 << "p_gf_meson.xml"; ssg<<config<< "_m" << m1 << "_m"<< m2 << "_p_gf_meson.xml";
ssz<<config<< "_m" << m1 << "_m"<< m2 << "p_z2_meson.xml"; ssz<<config<< "_m" << m1 << "_m"<< m2 << "_p_z2_meson.xml";
MesonTrace(ssz.str(),Z2Props[m1],Z2Props[m2],phase); MesonTrace(ssz.str(),Z2Props[m1],Z2Props[m2],phase);
MesonTrace(ssg.str(),GFProps[m1],GFProps[m2],phase);
/// Wall sinks /// Wall sinks
wssg<<config<< "_m" << m1 << "_m"<< m2 << "w_gf_meson.xml"; wssg<<config<< "_m" << m1 << "_m"<< m2 << "_w_gf_meson.xml";
wssz<<config<< "_m" << m1 << "_m"<< m2 << "w_z2_meson.xml"; wssz<<config<< "_m" << m1 << "_m"<< m2 << "_w_z2_meson.xml";
WallSinkMesonTrace(wssg.str(),wsnk_gfProps[m1],wsnk_gfProps[m2]); WallSinkMesonTrace(wssg.str(),wsnk_gfProps[m1],wsnk_gfProps[m2]);
WallSinkMesonTrace(wssz.str(),wsnk_z2Props[m1],wsnk_z2Props[m2]); WallSinkMesonTrace(wssz.str(),wsnk_z2Props[m1],wsnk_z2Props[m2]);

179
systems/Summit/comms.4node Normal file
View File

@ -0,0 +1,179 @@
OPENMPI detected
AcceleratorCudaInit[0]: ========================
AcceleratorCudaInit[0]: Device Number : 0
AcceleratorCudaInit[0]: ========================
AcceleratorCudaInit[0]: Device identifier: Tesla V100-SXM2-16GB
AcceleratorCudaInit[0]: totalGlobalMem: 16911433728
AcceleratorCudaInit[0]: managedMemory: 1
AcceleratorCudaInit[0]: isMultiGpuBoard: 0
AcceleratorCudaInit[0]: warpSize: 32
AcceleratorCudaInit[0]: pciBusID: 4
AcceleratorCudaInit[0]: pciDeviceID: 0
AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535)
AcceleratorCudaInit: rank 0 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 0 device 0 bus id: 0004:04:00.0
AcceleratorCudaInit: ================================================
SharedMemoryMpi: World communicator of size 24
SharedMemoryMpi: Node communicator of size 6
0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 1073741824bytes at 0x200060000000 for comms buffers
Setting up IPC
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|_ | | | | | | | | | | | | _|__
__|_ _|__
__|_ GGGG RRRR III DDDD _|__
__|_ G R R I D D _|__
__|_ G R R I D D _|__
__|_ G GG RRRR I D D _|__
__|_ G G R R I D D _|__
__|_ GGGG R R III DDDD _|__
__|_ _|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
| | | | | | | | | | | | | |
Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Current Grid git commit hash=7cb1ff7395a5833ded6526c43891bd07a0436290: (HEAD -> develop, origin/develop, origin/HEAD) clean
Grid : Message : ================================================
Grid : Message : MPI is initialised and logging filters activated
Grid : Message : ================================================
Grid : Message : Requested 1073741824 byte stencil comms buffers
AcceleratorCudaInit: rank 1 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 1 device 1 bus id: 0004:05:00.0
AcceleratorCudaInit: rank 2 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 2 device 2 bus id: 0004:06:00.0
AcceleratorCudaInit: rank 5 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 5 device 5 bus id: 0035:05:00.0
AcceleratorCudaInit: rank 4 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 4 device 4 bus id: 0035:04:00.0
AcceleratorCudaInit: rank 3 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 3 device 3 bus id: 0035:03:00.0
Grid : Message : MemoryManager Cache 13529146982 bytes
Grid : Message : MemoryManager::Init() setting up
Grid : Message : MemoryManager::Init() cache pool for recent allocations: SMALL 8 LARGE 2
Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory
Grid : Message : MemoryManager::Init() Using cudaMalloc
Grid : Message : 2.137929 s : Grid is setup to use 6 threads
Grid : Message : 2.137941 s : Number of iterations to average: 250
Grid : Message : 2.137950 s : ====================================================================================================
Grid : Message : 2.137958 s : = Benchmarking sequential halo exchange from host memory
Grid : Message : 2.137966 s : ====================================================================================================
Grid : Message : 2.137974 s : L Ls bytes MB/s uni MB/s bidi
AcceleratorCudaInit: rank 22 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 10 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 15 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 21 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 20 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 7 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 9 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 11 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 8 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 6 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 19 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 23 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 18 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 12 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 16 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 13 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 14 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 17 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
Grid : Message : 2.604949 s : 8 8 393216 89973.9 179947.8
Grid : Message : 2.668249 s : 8 8 393216 18650.3 37300.5
Grid : Message : 2.732288 s : 8 8 393216 18428.5 36857.1
Grid : Message : 2.753565 s : 8 8 393216 55497.2 110994.4
Grid : Message : 2.808960 s : 12 8 1327104 100181.5 200363.0
Grid : Message : 3.226900 s : 12 8 1327104 20600.5 41201.0
Grid : Message : 3.167459 s : 12 8 1327104 24104.6 48209.2
Grid : Message : 3.227660 s : 12 8 1327104 66156.7 132313.5
Grid : Message : 3.413570 s : 16 8 3145728 56174.4 112348.8
Grid : Message : 3.802697 s : 16 8 3145728 24255.9 48511.7
Grid : Message : 4.190498 s : 16 8 3145728 24336.7 48673.4
Grid : Message : 4.385171 s : 16 8 3145728 48484.1 96968.2
Grid : Message : 4.805284 s : 20 8 6144000 46380.5 92761.1
Grid : Message : 5.562975 s : 20 8 6144000 24328.5 48656.9
Grid : Message : 6.322562 s : 20 8 6144000 24266.7 48533.4
Grid : Message : 6.773598 s : 20 8 6144000 40868.5 81736.9
Grid : Message : 7.600999 s : 24 8 10616832 40198.3 80396.6
Grid : Message : 8.912917 s : 24 8 10616832 24279.5 48559.1
Grid : Message : 10.220961 s : 24 8 10616832 24350.2 48700.4
Grid : Message : 11.728250 s : 24 8 10616832 37390.9 74781.8
Grid : Message : 12.497258 s : 28 8 16859136 36792.2 73584.5
Grid : Message : 14.585387 s : 28 8 16859136 24222.2 48444.3
Grid : Message : 16.664783 s : 28 8 16859136 24323.4 48646.8
Grid : Message : 17.955238 s : 28 8 16859136 39194.7 78389.4
Grid : Message : 20.136479 s : 32 8 25165824 35718.3 71436.5
Grid : Message : 23.241958 s : 32 8 25165824 24311.4 48622.9
Grid : Message : 26.344810 s : 32 8 25165824 24331.9 48663.7
Grid : Message : 28.384420 s : 32 8 25165824 37016.3 74032.7
Grid : Message : 28.388879 s : ====================================================================================================
Grid : Message : 28.388894 s : = Benchmarking sequential halo exchange from GPU memory
Grid : Message : 28.388909 s : ====================================================================================================
Grid : Message : 28.388924 s : L Ls bytes MB/s uni MB/s bidi
Grid : Message : 28.553993 s : 8 8 393216 8272.4 16544.7
Grid : Message : 28.679592 s : 8 8 393216 9395.4 18790.8
Grid : Message : 28.811112 s : 8 8 393216 8971.0 17942.0
Grid : Message : 28.843770 s : 8 8 393216 36145.6 72291.2
Grid : Message : 28.981754 s : 12 8 1327104 49591.6 99183.2
Grid : Message : 29.299764 s : 12 8 1327104 12520.8 25041.7
Grid : Message : 29.620288 s : 12 8 1327104 12422.2 24844.4
Grid : Message : 29.657645 s : 12 8 1327104 106637.5 213275.1
Grid : Message : 29.952933 s : 16 8 3145728 43939.2 87878.5
Grid : Message : 30.585411 s : 16 8 3145728 14922.1 29844.2
Grid : Message : 31.219781 s : 16 8 3145728 14877.2 29754.4
Grid : Message : 31.285017 s : 16 8 3145728 144724.3 289448.7
Grid : Message : 31.706443 s : 20 8 6144000 54676.2 109352.4
Grid : Message : 32.739205 s : 20 8 6144000 17848.0 35696.1
Grid : Message : 33.771852 s : 20 8 6144000 17849.9 35699.7
Grid : Message : 33.871981 s : 20 8 6144000 184141.4 368282.8
Grid : Message : 34.536808 s : 24 8 10616832 55784.3 111568.6
Grid : Message : 36.275648 s : 24 8 10616832 18317.6 36635.3
Grid : Message : 37.997181 s : 24 8 10616832 18501.7 37003.4
Grid : Message : 38.140442 s : 24 8 10616832 222383.9 444767.9
Grid : Message : 39.177222 s : 28 8 16859136 56609.7 113219.4
Grid : Message : 41.874755 s : 28 8 16859136 18749.9 37499.8
Grid : Message : 44.529381 s : 28 8 16859136 19052.9 38105.8
Grid : Message : 44.742192 s : 28 8 16859136 237717.1 475434.2
Grid : Message : 46.184000 s : 32 8 25165824 57091.2 114182.4
Grid : Message : 50.734740 s : 32 8 25165824 19411.0 38821.9
Grid : Message : 53.931228 s : 32 8 25165824 19570.6 39141.2
Grid : Message : 54.238467 s : 32 8 25165824 245765.6 491531.2
Grid : Message : 54.268664 s : ====================================================================================================
Grid : Message : 54.268680 s : = All done; Bye Bye
Grid : Message : 54.268691 s : ====================================================================================================

206
systems/Summit/dwf.24.4node Normal file
View File

@ -0,0 +1,206 @@
OPENMPI detected
AcceleratorCudaInit[0]: ========================
AcceleratorCudaInit[0]: Device Number : 0
AcceleratorCudaInit[0]: ========================
AcceleratorCudaInit[0]: Device identifier: Tesla V100-SXM2-16GB
AcceleratorCudaInit[0]: totalGlobalMem: 16911433728
AcceleratorCudaInit[0]: managedMemory: 1
AcceleratorCudaInit[0]: isMultiGpuBoard: 0
AcceleratorCudaInit[0]: warpSize: 32
AcceleratorCudaInit[0]: pciBusID: 4
AcceleratorCudaInit[0]: pciDeviceID: 0
AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535)
AcceleratorCudaInit: rank 0 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 0 device 0 bus id: 0004:04:00.0
AcceleratorCudaInit: ================================================
SharedMemoryMpi: World communicator of size 24
SharedMemoryMpi: Node communicator of size 6
0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x200080000000 for comms buffers
AcceleratorCudaInit: rank 3 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 3 device 3 bus id: 0035:03:00.0
AcceleratorCudaInit: rank 5 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 5 device 5 bus id: 0035:05:00.0
Setting up IPC
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|_ | | | | | | | | | | | | _|__
__|_ _|__
__|_ GGGG RRRR III DDDD _|__
__|_ G R R I D D _|__
__|_ G R R I D D _|__
__|_ G GG RRRR I D D _|__
__|_ G G R R I D D _|__
__|_ GGGG R R III DDDD _|__
__|_ _|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
| | | | | | | | | | | | | |
Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
AcceleratorCudaInit: rank 4 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 4 device 4 bus id: 0035:04:00.0
AcceleratorCudaInit: rank 1 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 1 device 1 bus id: 0004:05:00.0
AcceleratorCudaInit: rank 2 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 2 device 2 bus id: 0004:06:00.0
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Current Grid git commit hash=7cb1ff7395a5833ded6526c43891bd07a0436290: (HEAD -> develop, origin/develop, origin/HEAD) clean
Grid : Message : ================================================
Grid : Message : MPI is initialised and logging filters activated
Grid : Message : ================================================
Grid : Message : Requested 2147483648 byte stencil comms buffers
Grid : Message : MemoryManager Cache 8388608000 bytes
Grid : Message : MemoryManager::Init() setting up
Grid : Message : MemoryManager::Init() cache pool for recent allocations: SMALL 8 LARGE 2
Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory
Grid : Message : MemoryManager::Init() Using cudaMalloc
Grid : Message : 1.731905 s : Grid Layout
Grid : Message : 1.731915 s : Global lattice size : 48 48 48 72
Grid : Message : 1.731928 s : OpenMP threads : 6
Grid : Message : 1.731938 s : MPI tasks : 2 2 2 3
AcceleratorCudaInit: rank 9 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 23 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 22 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 21 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 18 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 6 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 7 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 10 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 8 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 11 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 20 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 19 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 13 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 12 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 14 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 16 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 15 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 17 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
Grid : Message : 2.683494 s : Making s innermost grids
Grid : Message : 2.780034 s : Initialising 4d RNG
Grid : Message : 2.833099 s : Intialising parallel RNG with unique string 'The 4D RNG'
Grid : Message : 2.833121 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
Grid : Message : 2.916841 s : Initialising 5d RNG
Grid : Message : 3.762880 s : Intialising parallel RNG with unique string 'The 5D RNG'
Grid : Message : 3.762902 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
Grid : Message : 5.264345 s : Initialised RNGs
Grid : Message : 6.489904 s : Drawing gauge field
Grid : Message : 6.729262 s : Random gauge initialised
Grid : Message : 7.781273 s : Setting up Cshift based reference
Grid : Message : 8.725313 s : *****************************************************************
Grid : Message : 8.725332 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
Grid : Message : 8.725342 s : *****************************************************************
Grid : Message : 8.725352 s : *****************************************************************
Grid : Message : 8.725362 s : * Benchmarking DomainWallFermionR::Dhop
Grid : Message : 8.725372 s : * Vectorising space-time by 4
Grid : Message : 8.725383 s : * VComplexF size is 32 B
Grid : Message : 8.725395 s : * SINGLE precision
Grid : Message : 8.725405 s : * Using Overlapped Comms/Compute
Grid : Message : 8.725415 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 8.725425 s : *****************************************************************
Grid : Message : 9.465229 s : Called warmup
Grid : Message : 58.646066 s : Called Dw 3000 times in 4.91764e+07 us
Grid : Message : 58.646121 s : mflop/s = 1.02592e+07
Grid : Message : 58.646134 s : mflop/s per rank = 427468
Grid : Message : 58.646145 s : mflop/s per node = 2.56481e+06
Grid : Message : 58.646156 s : RF GiB/s (base 2) = 20846.5
Grid : Message : 58.646166 s : mem GiB/s (base 2) = 13029.1
Grid : Message : 58.648008 s : norm diff 1.04778e-13
Grid : Message : 58.734885 s : #### Dhop calls report
Grid : Message : 58.734897 s : WilsonFermion5D Number of DhopEO Calls : 6002
Grid : Message : 58.734909 s : WilsonFermion5D TotalTime /Calls : 8217.71 us
Grid : Message : 58.734922 s : WilsonFermion5D CommTime /Calls : 7109.5 us
Grid : Message : 58.734933 s : WilsonFermion5D FaceTime /Calls : 446.623 us
Grid : Message : 58.734943 s : WilsonFermion5D ComputeTime1/Calls : 18.0558 us
Grid : Message : 58.734953 s : WilsonFermion5D ComputeTime2/Calls : 731.097 us
Grid : Message : 58.734979 s : Average mflops/s per call : 4.8157e+09
Grid : Message : 58.734989 s : Average mflops/s per call per rank : 2.00654e+08
Grid : Message : 58.734999 s : Average mflops/s per call per node : 1.20393e+09
Grid : Message : 58.735008 s : Average mflops/s per call (full) : 1.04183e+07
Grid : Message : 58.735017 s : Average mflops/s per call per rank (full): 434094
Grid : Message : 58.735026 s : Average mflops/s per call per node (full): 2.60456e+06
Grid : Message : 58.735035 s : WilsonFermion5D Stencil
Grid : Message : 58.735043 s : WilsonFermion5D StencilEven
Grid : Message : 58.735051 s : WilsonFermion5D StencilOdd
Grid : Message : 58.735059 s : WilsonFermion5D Stencil Reporti()
Grid : Message : 58.735067 s : WilsonFermion5D StencilEven Reporti()
Grid : Message : 58.735075 s : WilsonFermion5D StencilOdd Reporti()
Grid : Message : 64.934380 s : Compare to naive wilson implementation Dag to verify correctness
Grid : Message : 64.934740 s : Called DwDag
Grid : Message : 64.934870 s : norm dag result 12.0422
Grid : Message : 64.120756 s : norm dag ref 12.0422
Grid : Message : 64.149389 s : norm dag diff 7.6644e-14
Grid : Message : 64.317786 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec
Grid : Message : 64.465331 s : src_e0.499995
Grid : Message : 64.524653 s : src_o0.500005
Grid : Message : 64.558706 s : *********************************************************
Grid : Message : 64.558717 s : * Benchmarking DomainWallFermionF::DhopEO
Grid : Message : 64.558727 s : * Vectorising space-time by 4
Grid : Message : 64.558737 s : * SINGLE precision
Grid : Message : 64.558745 s : * Using Overlapped Comms/Compute
Grid : Message : 64.558753 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 64.558761 s : *********************************************************
Grid : Message : 92.702145 s : Deo mflop/s = 8.97692e+06
Grid : Message : 92.702185 s : Deo mflop/s per rank 374038
Grid : Message : 92.702198 s : Deo mflop/s per node 2.24423e+06
Grid : Message : 92.702209 s : #### Dhop calls report
Grid : Message : 92.702223 s : WilsonFermion5D Number of DhopEO Calls : 3001
Grid : Message : 92.702240 s : WilsonFermion5D TotalTime /Calls : 9377.88 us
Grid : Message : 92.702257 s : WilsonFermion5D CommTime /Calls : 8221.84 us
Grid : Message : 92.702277 s : WilsonFermion5D FaceTime /Calls : 543.548 us
Grid : Message : 92.702301 s : WilsonFermion5D ComputeTime1/Calls : 20.936 us
Grid : Message : 92.702322 s : WilsonFermion5D ComputeTime2/Calls : 732.33 us
Grid : Message : 92.702376 s : Average mflops/s per call : 4.13001e+09
Grid : Message : 92.702387 s : Average mflops/s per call per rank : 1.72084e+08
Grid : Message : 92.702397 s : Average mflops/s per call per node : 1.0325e+09
Grid : Message : 92.702407 s : Average mflops/s per call (full) : 9.12937e+06
Grid : Message : 92.702416 s : Average mflops/s per call per rank (full): 380391
Grid : Message : 92.702426 s : Average mflops/s per call per node (full): 2.28234e+06
Grid : Message : 92.702435 s : WilsonFermion5D Stencil
Grid : Message : 92.702443 s : WilsonFermion5D StencilEven
Grid : Message : 92.702451 s : WilsonFermion5D StencilOdd
Grid : Message : 92.702459 s : WilsonFermion5D Stencil Reporti()
Grid : Message : 92.702467 s : WilsonFermion5D StencilEven Reporti()
Grid : Message : 92.702475 s : WilsonFermion5D StencilOdd Reporti()
Grid : Message : 92.772983 s : r_e6.02121
Grid : Message : 92.786384 s : r_o6.02102
Grid : Message : 92.799622 s : res12.0422
Grid : Message : 93.860500 s : norm diff 0
Grid : Message : 93.162026 s : norm diff even 0
Grid : Message : 93.197529 s : norm diff odd 0

206
systems/Summit/dwf.32.4node Normal file
View File

@ -0,0 +1,206 @@
OPENMPI detected
AcceleratorCudaInit[0]: ========================
AcceleratorCudaInit[0]: Device Number : 0
AcceleratorCudaInit[0]: ========================
AcceleratorCudaInit[0]: Device identifier: Tesla V100-SXM2-16GB
AcceleratorCudaInit[0]: totalGlobalMem: 16911433728
AcceleratorCudaInit[0]: managedMemory: 1
AcceleratorCudaInit[0]: isMultiGpuBoard: 0
AcceleratorCudaInit[0]: warpSize: 32
AcceleratorCudaInit[0]: pciBusID: 4
AcceleratorCudaInit[0]: pciDeviceID: 0
AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535)
AcceleratorCudaInit: rank 0 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 0 device 0 bus id: 0004:04:00.0
AcceleratorCudaInit: ================================================
SharedMemoryMpi: World communicator of size 24
SharedMemoryMpi: Node communicator of size 6
0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x200080000000 for comms buffers
Setting up IPC
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|_ | | | | | | | | | | | | _|__
__|_ _|__
__|_ GGGG RRRR III DDDD _|__
__|_ G R R I D D _|__
__|_ G R R I D D _|__
__|_ G GG RRRR I D D _|__
__|_ G G R R I D D _|__
__|_ GGGG R R III DDDD _|__
__|_ _|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
| | | | | | | | | | | | | |
Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
AcceleratorCudaInit: rank 2 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 2 device 2 bus id: 0004:06:00.0
AcceleratorCudaInit: rank 1 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 1 device 1 bus id: 0004:05:00.0
AcceleratorCudaInit: rank 4 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 4 device 4 bus id: 0035:04:00.0
AcceleratorCudaInit: rank 3 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 3 device 3 bus id: 0035:03:00.0
AcceleratorCudaInit: rank 5 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
local rank 5 device 5 bus id: 0035:05:00.0
GNU General Public License for more details.
Current Grid git commit hash=7cb1ff7395a5833ded6526c43891bd07a0436290: (HEAD -> develop, origin/develop, origin/HEAD) clean
Grid : Message : ================================================
Grid : Message : MPI is initialised and logging filters activated
Grid : Message : ================================================
Grid : Message : Requested 2147483648 byte stencil comms buffers
Grid : Message : MemoryManager Cache 8388608000 bytes
Grid : Message : MemoryManager::Init() setting up
Grid : Message : MemoryManager::Init() cache pool for recent allocations: SMALL 8 LARGE 2
Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory
Grid : Message : MemoryManager::Init() Using cudaMalloc
Grid : Message : 1.544984 s : Grid Layout
Grid : Message : 1.544992 s : Global lattice size : 64 64 64 96
Grid : Message : 1.545003 s : OpenMP threads : 6
Grid : Message : 1.545011 s : MPI tasks : 2 2 2 3
AcceleratorCudaInit: rank 8 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 6 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 11 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 16 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 17 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 13 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 12 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 21 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 23 setting device to node rank 5
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 22 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 19 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 18 setting device to node rank 0
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 7 setting device to node rank 1
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 10 setting device to node rank 4
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 9 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 14 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 15 setting device to node rank 3
AcceleratorCudaInit: Configure options --enable-setdevice=yes
AcceleratorCudaInit: rank 20 setting device to node rank 2
AcceleratorCudaInit: Configure options --enable-setdevice=yes
Grid : Message : 2.994920 s : Making s innermost grids
Grid : Message : 2.232502 s : Initialising 4d RNG
Grid : Message : 2.397047 s : Intialising parallel RNG with unique string 'The 4D RNG'
Grid : Message : 2.397069 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
Grid : Message : 2.653140 s : Initialising 5d RNG
Grid : Message : 5.285347 s : Intialising parallel RNG with unique string 'The 5D RNG'
Grid : Message : 5.285369 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
Grid : Message : 9.994738 s : Initialised RNGs
Grid : Message : 13.153426 s : Drawing gauge field
Grid : Message : 13.825697 s : Random gauge initialised
Grid : Message : 18.537657 s : Setting up Cshift based reference
Grid : Message : 22.296755 s : *****************************************************************
Grid : Message : 22.296781 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
Grid : Message : 22.296791 s : *****************************************************************
Grid : Message : 22.296800 s : *****************************************************************
Grid : Message : 22.296809 s : * Benchmarking DomainWallFermionR::Dhop
Grid : Message : 22.296818 s : * Vectorising space-time by 4
Grid : Message : 22.296828 s : * VComplexF size is 32 B
Grid : Message : 22.296838 s : * SINGLE precision
Grid : Message : 22.296847 s : * Using Overlapped Comms/Compute
Grid : Message : 22.296855 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 22.296863 s : *****************************************************************
Grid : Message : 24.746452 s : Called warmup
Grid : Message : 137.525756 s : Called Dw 3000 times in 1.12779e+08 us
Grid : Message : 137.525818 s : mflop/s = 1.41383e+07
Grid : Message : 137.525831 s : mflop/s per rank = 589097
Grid : Message : 137.525843 s : mflop/s per node = 3.53458e+06
Grid : Message : 137.525854 s : RF GiB/s (base 2) = 28728.7
Grid : Message : 137.525864 s : mem GiB/s (base 2) = 17955.5
Grid : Message : 137.693645 s : norm diff 1.04885e-13
Grid : Message : 137.965585 s : #### Dhop calls report
Grid : Message : 137.965598 s : WilsonFermion5D Number of DhopEO Calls : 6002
Grid : Message : 137.965612 s : WilsonFermion5D TotalTime /Calls : 18899.7 us
Grid : Message : 137.965624 s : WilsonFermion5D CommTime /Calls : 16041.4 us
Grid : Message : 137.965634 s : WilsonFermion5D FaceTime /Calls : 859.705 us
Grid : Message : 137.965644 s : WilsonFermion5D ComputeTime1/Calls : 70.5881 us
Grid : Message : 137.965654 s : WilsonFermion5D ComputeTime2/Calls : 2094.8 us
Grid : Message : 137.965682 s : Average mflops/s per call : 3.87638e+09
Grid : Message : 137.965692 s : Average mflops/s per call per rank : 1.61516e+08
Grid : Message : 137.965702 s : Average mflops/s per call per node : 9.69095e+08
Grid : Message : 137.965712 s : Average mflops/s per call (full) : 1.43168e+07
Grid : Message : 137.965721 s : Average mflops/s per call per rank (full): 596533
Grid : Message : 137.965730 s : Average mflops/s per call per node (full): 3.5792e+06
Grid : Message : 137.965740 s : WilsonFermion5D Stencil
Grid : Message : 137.965748 s : WilsonFermion5D StencilEven
Grid : Message : 137.965756 s : WilsonFermion5D StencilOdd
Grid : Message : 137.965764 s : WilsonFermion5D Stencil Reporti()
Grid : Message : 137.965772 s : WilsonFermion5D StencilEven Reporti()
Grid : Message : 137.965780 s : WilsonFermion5D StencilOdd Reporti()
Grid : Message : 156.554605 s : Compare to naive wilson implementation Dag to verify correctness
Grid : Message : 156.554632 s : Called DwDag
Grid : Message : 156.554642 s : norm dag result 12.0421
Grid : Message : 156.639265 s : norm dag ref 12.0421
Grid : Message : 156.888281 s : norm dag diff 7.62057e-14
Grid : Message : 157.609797 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec
Grid : Message : 158.208630 s : src_e0.499996
Grid : Message : 158.162447 s : src_o0.500004
Grid : Message : 158.267780 s : *********************************************************
Grid : Message : 158.267791 s : * Benchmarking DomainWallFermionF::DhopEO
Grid : Message : 158.267801 s : * Vectorising space-time by 4
Grid : Message : 158.267811 s : * SINGLE precision
Grid : Message : 158.267820 s : * Using Overlapped Comms/Compute
Grid : Message : 158.267828 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 158.267836 s : *********************************************************
Grid : Message : 216.487829 s : Deo mflop/s = 1.37283e+07
Grid : Message : 216.487869 s : Deo mflop/s per rank 572011
Grid : Message : 216.487881 s : Deo mflop/s per node 3.43206e+06
Grid : Message : 216.487893 s : #### Dhop calls report
Grid : Message : 216.487903 s : WilsonFermion5D Number of DhopEO Calls : 3001
Grid : Message : 216.487913 s : WilsonFermion5D TotalTime /Calls : 19399.6 us
Grid : Message : 216.487923 s : WilsonFermion5D CommTime /Calls : 16475.4 us
Grid : Message : 216.487933 s : WilsonFermion5D FaceTime /Calls : 972.393 us
Grid : Message : 216.487943 s : WilsonFermion5D ComputeTime1/Calls : 49.8474 us
Grid : Message : 216.487953 s : WilsonFermion5D ComputeTime2/Calls : 2089.93 us
Grid : Message : 216.488001 s : Average mflops/s per call : 5.39682e+09
Grid : Message : 216.488011 s : Average mflops/s per call per rank : 2.24867e+08
Grid : Message : 216.488020 s : Average mflops/s per call per node : 1.3492e+09
Grid : Message : 216.488030 s : Average mflops/s per call (full) : 1.39479e+07
Grid : Message : 216.488039 s : Average mflops/s per call per rank (full): 581162
Grid : Message : 216.488048 s : Average mflops/s per call per node (full): 3.48697e+06
Grid : Message : 216.488057 s : WilsonFermion5D Stencil
Grid : Message : 216.488065 s : WilsonFermion5D StencilEven
Grid : Message : 216.488073 s : WilsonFermion5D StencilOdd
Grid : Message : 216.488081 s : WilsonFermion5D Stencil Reporti()
Grid : Message : 216.488089 s : WilsonFermion5D StencilEven Reporti()
Grid : Message : 216.488097 s : WilsonFermion5D StencilOdd Reporti()
Grid : Message : 217.384495 s : r_e6.02113
Grid : Message : 217.426121 s : r_o6.02096
Grid : Message : 217.472636 s : res12.0421
Grid : Message : 218.200068 s : norm diff 0
Grid : Message : 218.645673 s : norm diff even 0
Grid : Message : 218.816561 s : norm diff odd 0

View File

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
#BSUB -P LGT104 #BSUB -P LGT104
#BSUB -W 2:00 #BSUB -W 2:00
#BSUB -nnodes 4 #BSUB -nnodes 16
#BSUB -J DWF #BSUB -J DWF
export OMP_NUM_THREADS=6 export OMP_NUM_THREADS=6
@ -9,14 +9,14 @@ export PAMI_IBV_ADAPTER_AFFINITY=1
export PAMI_ENABLE_STRIPING=1 export PAMI_ENABLE_STRIPING=1
export OPT="--comms-concurrent --comms-overlap " export OPT="--comms-concurrent --comms-overlap "
APP="./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.3 " APP="./benchmarks/Benchmark_comms_host_device --mpi 4.4.4.3 "
jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > comms.16node.log
APP="./benchmarks/Benchmark_dwf_fp32 --grid 48.48.48.72 --mpi 2.2.2.3 --shm 1024 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " APP="./benchmarks/Benchmark_dwf_fp32 --grid 96.96.96.72 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT "
jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.24.log
APP="./benchmarks/Benchmark_dwf_fp32 --grid 64.64.64.96 --mpi 2.2.2.3 --shm 1024 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " APP="./benchmarks/Benchmark_dwf_fp32 --grid 128.128.128.96 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT "
jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.32.log

View File

@ -10,13 +10,13 @@ export PAMI_ENABLE_STRIPING=1
export OPT="--comms-concurrent --comms-overlap " export OPT="--comms-concurrent --comms-overlap "
#export GRID_ALLOC_NCACHE_LARGE=1 #export GRID_ALLOC_NCACHE_LARGE=1
export APP="./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.3 " export APP="./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.3 "
jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > comms.4node
APP="./benchmarks/Benchmark_dwf_fp32 --grid 48.48.48.72 --mpi 2.2.2.3 --shm 1024 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " APP="./benchmarks/Benchmark_dwf_fp32 --grid 48.48.48.72 --mpi 2.2.2.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT "
jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.24.4node
APP="./benchmarks/Benchmark_dwf_fp32 --grid 64.64.64.96 --mpi 2.2.2.3 --shm 1024 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " APP="./benchmarks/Benchmark_dwf_fp32 --grid 64.64.64.96 --mpi 2.2.2.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT "
jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP jsrun --nrs 4 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.32.4node

View File

@ -5,7 +5,7 @@
--enable-gen-simd-width=64 \ --enable-gen-simd-width=64 \
--enable-accelerator=cuda \ --enable-accelerator=cuda \
--with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \ --with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
--disable-accelerator-cshift \ --enable-accelerator-cshift \
--disable-unified \ --disable-unified \
CXX=nvcc \ CXX=nvcc \
LDFLAGS="-cudart shared " \ LDFLAGS="-cudart shared " \

View File

@ -1,2 +1,6 @@
spack load c-lime module load cuda/11.4.1 openmpi/4.1.1-cuda11.4.1 ucx/1.12.0-cuda11.4.1
module load cuda/11.4.1 openmpi/4.1.1 ucx/1.10.1 #module load cuda/11.4.1 openmpi/4.1.1 ucx/1.10.1
export PREFIX=/home/tc002/tc002/shared/env/prefix/
export LD_LIBRARY_PATH=$PREFIX/lib/:$LD_LIBRARY_PATH
unset SBATCH_EXPORT

View File

@ -235,7 +235,6 @@ void TestWhat(What & Ddwf,
pickCheckerboard(Odd ,chi_o,chi); pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi); pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf); SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf);
HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_e,dchi_e);

View File

@ -215,7 +215,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd , chi_o, chi); pickCheckerboard(Odd , chi_o, chi);
pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd , phi_o, phi); pickCheckerboard(Odd , phi_o, phi);
RealD t1,t2;
SchurDiagMooeeOperator<DomainWallEOFAFermionR,LatticeFermion> HermOpEO(Ddwf); SchurDiagMooeeOperator<DomainWallEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
HermOpEO.MpcDagMpc(chi_e, dchi_e); HermOpEO.MpcDagMpc(chi_e, dchi_e);

View File

@ -212,8 +212,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi); pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi); pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf); SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_e,dchi_e);

View File

@ -181,8 +181,8 @@ void checkAdj(const Gamma::Algebra a)
void checkProject(GridSerialRNG &rng) void checkProject(GridSerialRNG &rng)
{ {
SpinVector rv, recon, full; SpinVector rv, recon;
HalfSpinVector hsp, hsm; HalfSpinVector hsm;
random(rng, rv); random(rng, rv);

View File

@ -198,7 +198,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi); pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi); pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<GparityWilsonFermionR,FermionField> HermOpEO(Dw); SchurDiagMooeeOperator<GparityWilsonFermionR,FermionField> HermOpEO(Dw);
HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_e,dchi_e);

View File

@ -364,14 +364,12 @@ int main(int argc, char **argv) {
{ // Peek-ology and Poke-ology, with a little app-ology { // Peek-ology and Poke-ology, with a little app-ology
Complex c; Complex c;
ColourMatrix c_m; ColourMatrix c_m = Zero();
SpinMatrix s_m; SpinMatrix s_m = Zero();
SpinColourMatrix sc_m; SpinColourMatrix sc_m = Zero();
s_m = TensorIndexRecursion<ColourIndex>::traceIndex( s_m = TensorIndexRecursion<ColourIndex>::traceIndex(sc_m); // Map to traceColour
sc_m); // Map to traceColour c_m = TensorIndexRecursion<SpinIndex>::traceIndex(sc_m); // map to traceSpin
c_m = TensorIndexRecursion<SpinIndex>::traceIndex(
sc_m); // map to traceSpin
c = TensorIndexRecursion<SpinIndex>::traceIndex(s_m); c = TensorIndexRecursion<SpinIndex>::traceIndex(s_m);
c = TensorIndexRecursion<ColourIndex>::traceIndex(c_m); c = TensorIndexRecursion<ColourIndex>::traceIndex(c_m);

View File

@ -217,7 +217,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd , chi_o, chi); pickCheckerboard(Odd , chi_o, chi);
pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd , phi_o, phi); pickCheckerboard(Odd , phi_o, phi);
RealD t1,t2;
SchurDiagMooeeOperator<MobiusEOFAFermionR,LatticeFermion> HermOpEO(Ddwf); SchurDiagMooeeOperator<MobiusEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
HermOpEO.MpcDagMpc(chi_e, dchi_e); HermOpEO.MpcDagMpc(chi_e, dchi_e);

View File

@ -262,7 +262,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi); pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi); pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<MobiusFermionR,LatticeFermion> HermOpEO(Ddwf); SchurDiagMooeeOperator<MobiusFermionR,LatticeFermion> HermOpEO(Ddwf);

View File

@ -144,7 +144,7 @@ int main (int argc, char ** argv)
Ds.Dhop(src,result,0); Ds.Dhop(src,result,0);
} }
double t1=usecond(); double t1=usecond();
double t2;
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146 double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl; std::cout<<GridLogMessage << "Called Ds"<<std::endl;

View File

@ -162,7 +162,6 @@ int main (int argc, char ** argv)
} }
double t1=usecond(); double t1=usecond();
double t2;
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146 double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl; std::cout<<GridLogMessage << "Called Ds"<<std::endl;

View File

@ -30,7 +30,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
@ -135,7 +134,6 @@ int main (int argc, char ** argv)
Ds.Dhop(src,result,0); Ds.Dhop(src,result,0);
} }
double t1=usecond(); double t1=usecond();
double t2;
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146 double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl; std::cout<<GridLogMessage << "Called Ds"<<std::endl;

View File

@ -204,7 +204,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi); pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi); pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw); SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw);
HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_e,dchi_e);

View File

@ -205,7 +205,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi); pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi); pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<WilsonTMFermionR,LatticeFermion> HermOpEO(Dw); SchurDiagMooeeOperator<WilsonTMFermionR,LatticeFermion> HermOpEO(Dw);
HermOpEO.MpcDagMpc(chi_e,dchi_e); HermOpEO.MpcDagMpc(chi_e,dchi_e);

View File

@ -276,7 +276,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi); pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi); pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi); pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<ZMobiusFermionR,LatticeFermion> HermOpEO(Ddwf); SchurDiagMooeeOperator<ZMobiusFermionR,LatticeFermion> HermOpEO(Ddwf);

View File

@ -57,7 +57,6 @@ int main (int argc, char ** argv)
SU<Nc>::HotConfiguration(pRNG,U); SU<Nc>::HotConfiguration(pRNG,U);
double beta = 1.0; double beta = 1.0;
double c1 = -0.331;
IwasakiGaugeActionR Action(beta); IwasakiGaugeActionR Action(beta);
// PlaqPlusRectangleActionR Action(beta,c1); // PlaqPlusRectangleActionR Action(beta,c1);

View File

@ -40,6 +40,7 @@ using namespace Grid;
template<class Fobj,class CComplex,int nbasis> template<class Fobj,class CComplex,int nbasis>
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public: public:
using LinearFunction<Lattice<iVector<CComplex,nbasis > > >::operator();
typedef iVector<CComplex,nbasis > CoarseSiteVector; typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField; typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
@ -67,6 +68,8 @@ public:
template<class Fobj,class CComplex,int nbasis> template<class Fobj,class CComplex,int nbasis>
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public: public:
using LinearFunction<Lattice<iVector<CComplex,nbasis > > >::operator ();
typedef iVector<CComplex,nbasis > CoarseSiteVector; typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField; typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field

View File

@ -55,6 +55,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix; Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator; FineOperator & _SmootherOperator;
@ -78,6 +79,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field> template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix; Matrix & SmootherMatrix;
FineOperator & SmootherOperator; FineOperator & SmootherOperator;
@ -108,6 +110,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > { class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator; typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -56,9 +56,9 @@ template<class Field> class SolverWrapper : public LinearFunction<Field> {
private: private:
CheckerBoardedSparseMatrixBase<Field> & _Matrix; CheckerBoardedSparseMatrixBase<Field> & _Matrix;
SchurRedBlackBase<Field> & _Solver; SchurRedBlackBase<Field> & _Solver;
public: public:
using LinearFunction<Field>::operator();
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Wrap the usual normal equations trick // Wrap the usual normal equations trick
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
SolverWrapper(CheckerBoardedSparseMatrixBase<Field> &Matrix, SolverWrapper(CheckerBoardedSparseMatrixBase<Field> &Matrix,
@ -75,6 +75,7 @@ public:
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix; Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator; FineOperator & _SmootherOperator;
@ -98,6 +99,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field> template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix; Matrix & SmootherMatrix;
FineOperator & SmootherOperator; FineOperator & SmootherOperator;
@ -128,6 +130,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > { class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator; typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -55,6 +55,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix; Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator; FineOperator & _SmootherOperator;
@ -78,6 +79,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field> template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix; Matrix & SmootherMatrix;
FineOperator & SmootherOperator; FineOperator & SmootherOperator;
@ -108,6 +110,8 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > { class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator; typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -56,6 +56,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix; Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator; FineOperator & _SmootherOperator;
@ -79,6 +80,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field> template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix; Matrix & SmootherMatrix;
FineOperator & SmootherOperator; FineOperator & SmootherOperator;
@ -108,6 +110,7 @@ public:
template<class Field,class Matrix> class RedBlackSmoother : public LinearFunction<Field> template<class Field,class Matrix> class RedBlackSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix; Matrix & SmootherMatrix;
RealD tol; RealD tol;
@ -134,6 +137,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > { class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator; typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
@ -241,7 +245,7 @@ int main (int argc, char ** argv)
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
const int Ls=16; const int Ls=16;
const int rLs=8; // const int rLs=8;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
@ -388,7 +392,7 @@ int main (int argc, char ** argv)
// RedBlackSmoother<LatticeFermion,DomainWallFermionR> FineRBSmoother(0.00,0.001,100,Ddwf); // RedBlackSmoother<LatticeFermion,DomainWallFermionR> FineRBSmoother(0.00,0.001,100,Ddwf);
// Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space // Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space
ZeroGuesser<CoarseVector> CoarseZeroGuesser; // ZeroGuesser<CoarseVector> CoarseZeroGuesser;
TwoLevelMG TwoLevelPrecon(Aggregates, LDOp, TwoLevelMG TwoLevelPrecon(Aggregates, LDOp,
HermIndefOp,Ddwf, HermIndefOp,Ddwf,
FineSmoother, FineSmoother,

View File

@ -57,7 +57,7 @@ private:
CheckerBoardedSparseMatrixBase<Field> & _Matrix; CheckerBoardedSparseMatrixBase<Field> & _Matrix;
SchurRedBlackBase<Field> & _Solver; SchurRedBlackBase<Field> & _Solver;
public: public:
using LinearFunction<Field>::operator();
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Wrap the usual normal equations trick // Wrap the usual normal equations trick
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
@ -75,6 +75,7 @@ public:
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix; Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator; FineOperator & _SmootherOperator;
@ -98,6 +99,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field> template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix; Matrix & SmootherMatrix;
FineOperator & SmootherOperator; FineOperator & SmootherOperator;
@ -128,6 +130,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > { class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator; typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -55,6 +55,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix; Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator; FineOperator & _SmootherOperator;
@ -78,6 +79,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field> template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix; Matrix & SmootherMatrix;
FineOperator & SmootherOperator; FineOperator & SmootherOperator;
@ -108,6 +110,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > { class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator; typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -57,6 +57,7 @@ private:
OperatorFunction<Field> & _Solver; OperatorFunction<Field> & _Solver;
LinearFunction<Field> & _Guess; LinearFunction<Field> & _Guess;
public: public:
using LinearFunction<Field>::operator();
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Wrap the usual normal equations trick // Wrap the usual normal equations trick
@ -118,6 +119,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix; Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator; FineOperator & _SmootherOperator;
@ -174,6 +176,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > { class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator; typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -456,8 +456,8 @@ public:
siteVector *CBp=Stencil.CommBuf(); siteVector *CBp=Stencil.CommBuf();
int ptype; // int ptype;
int nb2=nbasis/2; // int nb2=nbasis/2;
autoView(in_v , in, AcceleratorRead); autoView(in_v , in, AcceleratorRead);
autoView(st, Stencil, AcceleratorRead); autoView(st, Stencil, AcceleratorRead);
@ -471,7 +471,7 @@ public:
typedef decltype(coalescedRead(in_v[0])) calcVector; typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex; typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
int sU = sF/Ls; int sU = sF/Ls;
int s = sF%Ls; // int s = sF%Ls;
calcComplex res = Zero(); calcComplex res = Zero();
calcVector nbr; calcVector nbr;
@ -517,14 +517,14 @@ public:
autoView(st, Stencil, AcceleratorRead); autoView(st, Stencil, AcceleratorRead);
siteVector *CBp=Stencil.CommBuf(); siteVector *CBp=Stencil.CommBuf();
int ptype; // int ptype;
int nb2=nbasis/2; // int nb2=nbasis/2;
accelerator_for2d(sF, Coarse5D->oSites(), b, nbasis, Nsimd, { accelerator_for2d(sF, Coarse5D->oSites(), b, nbasis, Nsimd, {
typedef decltype(coalescedRead(in_v[0])) calcVector; typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex; typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
int sU = sF/Ls; int sU = sF/Ls;
int s = sF%Ls; // int s = sF%Ls;
calcComplex res = Zero(); calcComplex res = Zero();
@ -650,7 +650,7 @@ private:
OperatorFunction<Field> & _Solver; OperatorFunction<Field> & _Solver;
LinearFunction<Field> & _Guess; LinearFunction<Field> & _Guess;
public: public:
using LinearFunction<Field>::operator();
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Wrap the usual normal equations trick // Wrap the usual normal equations trick
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
@ -712,6 +712,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix; Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator; FineOperator & _SmootherOperator;
@ -735,6 +736,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
class MGPreconditioner : public LinearFunction< Lattice<Fobj> > { class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector; typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
@ -831,6 +833,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > { class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector; typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
@ -1174,18 +1177,18 @@ int main (int argc, char ** argv)
PlainHermOp<CoarseCoarseVector> IRLOpL2 (IRLHermOpL2); PlainHermOp<CoarseCoarseVector> IRLOpL2 (IRLHermOpL2);
ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20); ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20);
int cNconv;
cNm=0; cNm=0;
std::vector<RealD> eval2(cNm); std::vector<RealD> eval2(cNm);
std::vector<CoarseCoarseVector> evec2(cNm,CoarseCoarse5d); std::vector<CoarseCoarseVector> evec2(cNm,CoarseCoarse5d);
cc_src=1.0; cc_src=1.0;
// int cNconv;
// IRLL2.calc(eval2,evec2,cc_src,cNconv); // IRLL2.calc(eval2,evec2,cc_src,cNconv);
ConjugateGradient<CoarseCoarseVector> CoarseCoarseCG(0.02,10000); ConjugateGradient<CoarseCoarseVector> CoarseCoarseCG(0.02,10000);
DeflatedGuesser<CoarseCoarseVector> DeflCoarseCoarseGuesser(evec2,eval2); DeflatedGuesser<CoarseCoarseVector> DeflCoarseCoarseGuesser(evec2,eval2);
NormalEquations<CoarseCoarseVector> DeflCoarseCoarseCGNE(cc_Dwf,CoarseCoarseCG,DeflCoarseCoarseGuesser); NormalEquations<CoarseCoarseVector> DeflCoarseCoarseCGNE(cc_Dwf,CoarseCoarseCG,DeflCoarseCoarseGuesser);
ZeroGuesser<CoarseVector> CoarseZeroGuesser; // ZeroGuesser<CoarseVector> CoarseZeroGuesser;
ZeroGuesser<CoarseCoarseVector> CoarseCoarseZeroGuesser; ZeroGuesser<CoarseCoarseVector> CoarseCoarseZeroGuesser;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;

View File

@ -456,8 +456,8 @@ public:
siteVector *CBp=Stencil.CommBuf(); siteVector *CBp=Stencil.CommBuf();
int ptype; //int ptype;
int nb2=nbasis/2; // int nb2=nbasis/2;
autoView(in_v , in, AcceleratorRead); autoView(in_v , in, AcceleratorRead);
autoView(st, Stencil, AcceleratorRead); autoView(st, Stencil, AcceleratorRead);
@ -471,7 +471,7 @@ public:
typedef decltype(coalescedRead(in_v[0])) calcVector; typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex; typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
int sU = sF/Ls; int sU = sF/Ls;
int s = sF%Ls; // int s = sF%Ls;
calcComplex res = Zero(); calcComplex res = Zero();
calcVector nbr; calcVector nbr;
@ -517,14 +517,14 @@ public:
autoView(st, Stencil, AcceleratorRead); autoView(st, Stencil, AcceleratorRead);
siteVector *CBp=Stencil.CommBuf(); siteVector *CBp=Stencil.CommBuf();
int ptype; // int ptype;
int nb2=nbasis/2; // int nb2=nbasis/2;
accelerator_for2d(sF, Coarse5D->oSites(), b, nbasis, Nsimd, { accelerator_for2d(sF, Coarse5D->oSites(), b, nbasis, Nsimd, {
typedef decltype(coalescedRead(in_v[0])) calcVector; typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex; typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
int sU = sF/Ls; int sU = sF/Ls;
int s = sF%Ls; // int s = sF%Ls;
calcComplex res = Zero(); calcComplex res = Zero();
@ -648,7 +648,7 @@ private:
CheckerBoardedSparseMatrixBase<Field> & _Matrix; CheckerBoardedSparseMatrixBase<Field> & _Matrix;
SchurRedBlackBase<Field> & _Solver; SchurRedBlackBase<Field> & _Solver;
public: public:
using LinearFunction<Field>::operator();
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Wrap the usual normal equations trick // Wrap the usual normal equations trick
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
@ -669,6 +669,7 @@ private:
OperatorFunction<Field> & _Solver; OperatorFunction<Field> & _Solver;
LinearFunction<Field> & _Guess; LinearFunction<Field> & _Guess;
public: public:
using LinearFunction<Field>::operator();
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Wrap the usual normal equations trick // Wrap the usual normal equations trick
@ -731,6 +732,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator; typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix; Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator; FineOperator & _SmootherOperator;
@ -754,6 +756,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
class MGPreconditioner : public LinearFunction< Lattice<Fobj> > { class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector; typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
@ -850,7 +853,8 @@ public:
template<class Fobj,class CComplex,int nbasis, class CoarseSolver> template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > { class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector; typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix; typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
@ -1194,11 +1198,11 @@ int main (int argc, char ** argv)
PlainHermOp<CoarseCoarseVector> IRLOpL2 (IRLHermOpL2); PlainHermOp<CoarseCoarseVector> IRLOpL2 (IRLHermOpL2);
ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20); ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20);
int cNconv;
cNm=0; cNm=0;
std::vector<RealD> eval2(cNm); std::vector<RealD> eval2(cNm);
std::vector<CoarseCoarseVector> evec2(cNm,CoarseCoarse5d); std::vector<CoarseCoarseVector> evec2(cNm,CoarseCoarse5d);
cc_src=1.0; cc_src=1.0;
// int cNconv;
// IRLL2.calc(eval2,evec2,cc_src,cNconv); // IRLL2.calc(eval2,evec2,cc_src,cNconv);
std::vector<RealD> tols ({0.005,0.001}); std::vector<RealD> tols ({0.005,0.001});
@ -1218,10 +1222,10 @@ int main (int argc, char ** argv)
for(auto c_hi : c_his ) { for(auto c_hi : c_his ) {
for(auto f_lo : f_los ) { for(auto f_lo : f_los ) {
for(auto f_hi : f_his ) { for(auto f_hi : f_his ) {
ZeroGuesser<CoarseVector> CoarseZeroGuesser; // ZeroGuesser<CoarseVector> CoarseZeroGuesser;
ZeroGuesser<CoarseCoarseVector> CoarseCoarseZeroGuesser; // ZeroGuesser<CoarseCoarseVector> CoarseCoarseZeroGuesser;
ConjugateGradient<CoarseCoarseVector> CoarseCoarseCG(tol,10000); ConjugateGradient<CoarseCoarseVector> CoarseCoarseCG(tol,10000);
ZeroGuesser<CoarseCoarseVector> CoarseCoarseGuesser; // ZeroGuesser<CoarseCoarseVector> CoarseCoarseGuesser;
SchurRedBlackDiagMooeeSolve<CoarseCoarseVector> CoarseCoarseRBCG(CoarseCoarseCG); SchurRedBlackDiagMooeeSolve<CoarseCoarseVector> CoarseCoarseRBCG(CoarseCoarseCG);
SchurSolverWrapper<CoarseCoarseVector> CoarseCoarseSolver(cc_Dwf,CoarseCoarseRBCG); SchurSolverWrapper<CoarseCoarseVector> CoarseCoarseSolver(cc_Dwf,CoarseCoarseRBCG);

View File

@ -143,6 +143,7 @@ public:
template<class Field> class MultiGridPreconditionerBase : public LinearFunction<Field> { template<class Field> class MultiGridPreconditionerBase : public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
virtual ~MultiGridPreconditionerBase() = default; virtual ~MultiGridPreconditionerBase() = default;
virtual void setup() = 0; virtual void setup() = 0;
virtual void operator()(Field const &in, Field &out) = 0; virtual void operator()(Field const &in, Field &out) = 0;
@ -156,6 +157,7 @@ public:
///////////////////////////////////////////// /////////////////////////////////////////////
// Type Definitions // Type Definitions
///////////////////////////////////////////// /////////////////////////////////////////////
using MultiGridPreconditionerBase<Lattice<Fobj>>::operator();
// clang-format off // clang-format off
typedef Aggregation<Fobj, CComplex, nBasis> Aggregates; typedef Aggregation<Fobj, CComplex, nBasis> Aggregates;
@ -568,6 +570,7 @@ public:
///////////////////////////////////////////// /////////////////////////////////////////////
// Type Definitions // Type Definitions
///////////////////////////////////////////// /////////////////////////////////////////////
using MultiGridPreconditionerBase<Lattice<Fobj>>::operator();
typedef Matrix FineDiracMatrix; typedef Matrix FineDiracMatrix;
typedef Lattice<Fobj> FineVector; typedef Lattice<Fobj> FineVector;

View File

@ -56,7 +56,6 @@ int main (int argc, char ** argv)
QuasiMinimalResidual<LatticeFermion> QMR(1.0e-8,10000); QuasiMinimalResidual<LatticeFermion> QMR(1.0e-8,10000);
RealD mass=0.0; RealD mass=0.0;
RealD M5=1.8;
WilsonFermionR Dw(Umu,*Grid,*rbGrid,mass); WilsonFermionR Dw(Umu,*Grid,*rbGrid,mass);
NonHermitianLinearOperator<WilsonFermionR,LatticeFermion> NonHermOp(Dw); NonHermitianLinearOperator<WilsonFermionR,LatticeFermion> NonHermOp(Dw);