1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Merge branch 'develop' into develop

This commit is contained in:
Peter Boyle 2021-10-27 20:26:57 -04:00 committed by GitHub
commit 195ab2888d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
190 changed files with 5851 additions and 1416 deletions

1
.gitignore vendored
View File

@ -88,6 +88,7 @@ Thumbs.db
# build directory # # build directory #
################### ###################
build*/* build*/*
Documentation/_build
# IDE related files # # IDE related files #
##################### #####################

View File

@ -358,7 +358,7 @@ public:
autoView( in_v , in, AcceleratorRead); autoView( in_v , in, AcceleratorRead);
autoView( out_v , out, AcceleratorWrite); autoView( out_v , out, AcceleratorWrite);
autoView( Stencil_v , Stencil, AcceleratorRead); autoView( Stencil_v , Stencil, AcceleratorRead);
auto& geom_v = geom; int npoint = geom.npoint;
typedef LatticeView<Cobj> Aview; typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer; Vector<Aview> AcceleratorViewContainer;
@ -380,7 +380,7 @@ public:
int ptype; int ptype;
StencilEntry *SE; StencilEntry *SE;
for(int point=0;point<geom_v.npoint;point++){ for(int point=0;point<npoint;point++){
SE=Stencil_v.GetEntry(ptype,point,ss); SE=Stencil_v.GetEntry(ptype,point,ss);
@ -424,7 +424,7 @@ public:
autoView( in_v , in, AcceleratorRead); autoView( in_v , in, AcceleratorRead);
autoView( out_v , out, AcceleratorWrite); autoView( out_v , out, AcceleratorWrite);
autoView( Stencil_v , Stencil, AcceleratorRead); autoView( Stencil_v , Stencil, AcceleratorRead);
auto& geom_v = geom; int npoint = geom.npoint;
typedef LatticeView<Cobj> Aview; typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer; Vector<Aview> AcceleratorViewContainer;
@ -442,6 +442,8 @@ public:
for(int p=0; p<geom.npoint; p++) for(int p=0; p<geom.npoint; p++)
points[p] = geom.points_dagger[p]; points[p] = geom.points_dagger[p];
auto points_p = &points[0];
RealD* dag_factor_p = &dag_factor[0]; RealD* dag_factor_p = &dag_factor[0];
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
@ -452,8 +454,8 @@ public:
int ptype; int ptype;
StencilEntry *SE; StencilEntry *SE;
for(int p=0;p<geom_v.npoint;p++){ for(int p=0;p<npoint;p++){
int point = points[p]; int point = points_p[p];
SE=Stencil_v.GetEntry(ptype,point,ss); SE=Stencil_v.GetEntry(ptype,point,ss);
@ -708,6 +710,8 @@ public:
for(int p=0; p<npoint; p++) for(int p=0; p<npoint; p++)
points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p;
auto points_p = &points[0];
Vector<Aview> AcceleratorViewContainer; Vector<Aview> AcceleratorViewContainer;
for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead));
Aview *Aview_p = & AcceleratorViewContainer[0]; Aview *Aview_p = & AcceleratorViewContainer[0];
@ -728,7 +732,7 @@ public:
StencilEntry *SE; StencilEntry *SE;
for(int p=0;p<npoint;p++){ for(int p=0;p<npoint;p++){
int point = points[p]; int point = points_p[p];
SE=st_v.GetEntry(ptype,point,ss); SE=st_v.GetEntry(ptype,point,ss);
if(SE->_is_local) { if(SE->_is_local) {
@ -754,7 +758,7 @@ public:
StencilEntry *SE; StencilEntry *SE;
for(int p=0;p<npoint;p++){ for(int p=0;p<npoint;p++){
int point = points[p]; int point = points_p[p];
SE=st_v.GetEntry(ptype,point,ss); SE=st_v.GetEntry(ptype,point,ss);
if(SE->_is_local) { if(SE->_is_local) {

View File

@ -136,7 +136,7 @@ public:
flops=0; flops=0;
usec =0; usec =0;
Coordinate layout(Nd,1); Coordinate layout(Nd,1);
sgrid = new GridCartesian(dimensions,layout,processors); sgrid = new GridCartesian(dimensions,layout,processors,*grid);
}; };
~FFT ( void) { ~FFT ( void) {
@ -182,7 +182,7 @@ public:
pencil_gd[dim] = G*processors[dim]; pencil_gd[dim] = G*processors[dim];
// Pencil global vol LxLxGxLxL per node // Pencil global vol LxLxGxLxL per node
GridCartesian pencil_g(pencil_gd,layout,processors); GridCartesian pencil_g(pencil_gd,layout,processors,*vgrid);
// Construct pencils // Construct pencils
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;

View File

@ -52,6 +52,7 @@ public:
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0; virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
virtual void HermOp(const Field &in, Field &out)=0; virtual void HermOp(const Field &in, Field &out)=0;
virtual ~LinearOperatorBase(){};
}; };
@ -507,7 +508,7 @@ class SchurStaggeredOperator : public SchurOperatorBase<Field> {
virtual void MpcDag (const Field &in, Field &out){ virtual void MpcDag (const Field &in, Field &out){
Mpc(in,out); Mpc(in,out);
} }
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { virtual void MpcDagMpc(const Field &in, Field &out) {
assert(0);// Never need with staggered assert(0);// Never need with staggered
} }
}; };
@ -530,6 +531,16 @@ public:
template<class Field> class LinearFunction { template<class Field> class LinearFunction {
public: public:
virtual void operator() (const Field &in, Field &out) = 0; virtual void operator() (const Field &in, Field &out) = 0;
virtual void operator() (const std::vector<Field> &in, std::vector<Field> &out)
{
assert(in.size() == out.size());
for (unsigned int i = 0; i < in.size(); ++i)
{
(*this)(in[i], out[i]);
}
}
}; };
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {
@ -575,6 +586,7 @@ class HermOpOperatorFunction : public OperatorFunction<Field> {
template<typename Field> template<typename Field>
class PlainHermOp : public LinearFunction<Field> { class PlainHermOp : public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
LinearOperatorBase<Field> &_Linop; LinearOperatorBase<Field> &_Linop;
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop) PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)
@ -588,6 +600,7 @@ public:
template<typename Field> template<typename Field>
class FunctionHermOp : public LinearFunction<Field> { class FunctionHermOp : public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
OperatorFunction<Field> & _poly; OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop; LinearOperatorBase<Field> &_Linop;

View File

@ -30,13 +30,19 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class Field> class Preconditioner : public LinearFunction<Field> { template<class Field> using Preconditioner = LinearFunction<Field> ;
/*
template<class Field> class Preconditioner : public LinearFunction<Field> {
using LinearFunction<Field>::operator();
virtual void operator()(const Field &src, Field & psi)=0; virtual void operator()(const Field &src, Field & psi)=0;
}; };
*/
template<class Field> class TrivialPrecon : public Preconditioner<Field> { template<class Field> class TrivialPrecon : public Preconditioner<Field> {
public: public:
void operator()(const Field &src, Field & psi){ using Preconditioner<Field>::operator();
virtual void operator()(const Field &src, Field & psi){
psi = src; psi = src;
} }
TrivialPrecon(void){}; TrivialPrecon(void){};

View File

@ -48,6 +48,7 @@ public:
virtual void Mdiag (const Field &in, Field &out)=0; virtual void Mdiag (const Field &in, Field &out)=0;
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0; virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0; virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
virtual ~SparseMatrixBase() {};
}; };
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
@ -72,7 +73,7 @@ public:
virtual void MeooeDag (const Field &in, Field &out)=0; virtual void MeooeDag (const Field &in, Field &out)=0;
virtual void MooeeDag (const Field &in, Field &out)=0; virtual void MooeeDag (const Field &in, Field &out)=0;
virtual void MooeeInvDag (const Field &in, Field &out)=0; virtual void MooeeInvDag (const Field &in, Field &out)=0;
virtual ~CheckerBoardedSparseMatrixBase() {};
}; };
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -36,7 +36,8 @@ NAMESPACE_BEGIN(Grid);
template<class FieldD, class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> template<class FieldD, class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class MixedPrecisionBiCGSTAB : public LinearFunction<FieldD> class MixedPrecisionBiCGSTAB : public LinearFunction<FieldD>
{ {
public: public:
using LinearFunction<FieldD>::operator();
RealD Tolerance; RealD Tolerance;
RealD InnerTolerance; // Initial tolerance for inner CG. Defaults to Tolerance but can be changed RealD InnerTolerance; // Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations; Integer MaxInnerIterations;

View File

@ -35,7 +35,8 @@ NAMESPACE_BEGIN(Grid);
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> { class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
public: public:
using LinearFunction<FieldD>::operator();
RealD Tolerance; RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations; Integer MaxInnerIterations;

View File

@ -33,16 +33,19 @@ namespace Grid {
template<class Field> template<class Field>
class ZeroGuesser: public LinearFunction<Field> { class ZeroGuesser: public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
virtual void operator()(const Field &src, Field &guess) { guess = Zero(); }; virtual void operator()(const Field &src, Field &guess) { guess = Zero(); };
}; };
template<class Field> template<class Field>
class DoNothingGuesser: public LinearFunction<Field> { class DoNothingGuesser: public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
virtual void operator()(const Field &src, Field &guess) { }; virtual void operator()(const Field &src, Field &guess) { };
}; };
template<class Field> template<class Field>
class SourceGuesser: public LinearFunction<Field> { class SourceGuesser: public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
virtual void operator()(const Field &src, Field &guess) { guess = src; }; virtual void operator()(const Field &src, Field &guess) { guess = src; };
}; };
@ -54,15 +57,24 @@ class DeflatedGuesser: public LinearFunction<Field> {
private: private:
const std::vector<Field> &evec; const std::vector<Field> &evec;
const std::vector<RealD> &eval; const std::vector<RealD> &eval;
const unsigned int N;
public: public:
using LinearFunction<Field>::operator();
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {}; DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval)
: DeflatedGuesser(_evec, _eval, _evec.size())
{}
DeflatedGuesser(const std::vector<Field> & _evec, const std::vector<RealD> & _eval, const unsigned int _N)
: evec(_evec), eval(_eval), N(_N)
{
assert(evec.size()==eval.size());
assert(N <= evec.size());
}
virtual void operator()(const Field &src,Field &guess) { virtual void operator()(const Field &src,Field &guess) {
guess = Zero(); guess = Zero();
assert(evec.size()==eval.size());
auto N = evec.size();
for (int i=0;i<N;i++) { for (int i=0;i<N;i++) {
const Field& tmp = evec[i]; const Field& tmp = evec[i];
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
@ -79,6 +91,7 @@ private:
const std::vector<RealD> &eval_coarse; const std::vector<RealD> &eval_coarse;
public: public:
using LinearFunction<FineField>::operator();
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace, LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
const std::vector<CoarseField> &_evec_coarse, const std::vector<CoarseField> &_evec_coarse,
const std::vector<RealD> &_eval_coarse) const std::vector<RealD> &_eval_coarse)

View File

@ -67,6 +67,7 @@ public:
template<class Fobj,class CComplex,int nbasis> template<class Fobj,class CComplex,int nbasis>
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public: public:
using LinearFunction<Lattice<iVector<CComplex,nbasis > > >::operator();
typedef iVector<CComplex,nbasis > CoarseSiteVector; typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField; typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
@ -97,6 +98,7 @@ public:
template<class Fobj,class CComplex,int nbasis> template<class Fobj,class CComplex,int nbasis>
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public: public:
using LinearFunction<Lattice<iVector<CComplex,nbasis > > >::operator();
typedef iVector<CComplex,nbasis > CoarseSiteVector; typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField; typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field

View File

@ -43,7 +43,7 @@ NAMESPACE_BEGIN(Grid);
template<class Field> template<class Field>
class PrecGeneralisedConjugateResidual : public LinearFunction<Field> { class PrecGeneralisedConjugateResidual : public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
int verbose; int verbose;

View File

@ -43,7 +43,7 @@ NAMESPACE_BEGIN(Grid);
template<class Field> template<class Field>
class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction<Field> { class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction<Field> {
public: public:
using LinearFunction<Field>::operator();
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
int verbose; int verbose;
@ -119,7 +119,8 @@ public:
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){ RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
RealD cp; RealD cp;
ComplexD a, b, zAz; ComplexD a, b;
// ComplexD zAz;
RealD zAAz; RealD zAAz;
ComplexD rq; ComplexD rq;
@ -146,7 +147,7 @@ public:
////////////////////////////////// //////////////////////////////////
MatTimer.Start(); MatTimer.Start();
Linop.Op(psi,Az); Linop.Op(psi,Az);
zAz = innerProduct(Az,psi); // zAz = innerProduct(Az,psi);
zAAz= norm2(Az); zAAz= norm2(Az);
MatTimer.Stop(); MatTimer.Stop();
@ -170,7 +171,7 @@ public:
LinalgTimer.Start(); LinalgTimer.Start();
zAz = innerProduct(Az,psi); // zAz = innerProduct(Az,psi);
zAAz= norm2(Az); zAAz= norm2(Az);
//p[0],q[0],qq[0] //p[0],q[0],qq[0]
@ -212,7 +213,7 @@ public:
MatTimer.Start(); MatTimer.Start();
Linop.Op(z,Az); Linop.Op(z,Az);
MatTimer.Stop(); MatTimer.Stop();
zAz = innerProduct(Az,psi); // zAz = innerProduct(Az,psi);
zAAz= norm2(Az); zAAz= norm2(Az);
LinalgTimer.Start(); LinalgTimer.Start();

View File

@ -132,6 +132,31 @@ namespace Grid {
(*this)(_Matrix,in,out,guess); (*this)(_Matrix,in,out,guess);
} }
void RedBlackSource(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
Field tmp(grid);
int nblock = in.size();
for(int b=0;b<nblock;b++){
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
}
}
// James can write his own deflated guesser
// with optimised code for the inner products
// RedBlackSolveSplitGrid();
// RedBlackSolve(_Matrix,src_o,sol_o);
void RedBlackSolution(Matrix &_Matrix, const std::vector<Field> &in, const std::vector<Field> &sol_o, std::vector<Field> &out)
{
GridBase *grid = _Matrix.RedBlackGrid();
Field tmp(grid);
int nblock = in.size();
for(int b=0;b<nblock;b++) {
pickCheckerboard(Even,tmp,in[b]);
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
}
}
template<class Guesser> template<class Guesser>
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess) void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
{ {
@ -150,24 +175,29 @@ namespace Grid {
//////////////////////////////////////////////// ////////////////////////////////////////////////
// Prepare RedBlack source // Prepare RedBlack source
//////////////////////////////////////////////// ////////////////////////////////////////////////
for(int b=0;b<nblock;b++){ RedBlackSource(_Matrix,in,src_o);
RedBlackSource(_Matrix,in[b],tmp,src_o[b]); // for(int b=0;b<nblock;b++){
} // RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
// }
//////////////////////////////////////////////// ////////////////////////////////////////////////
// Make the guesses // Make the guesses
//////////////////////////////////////////////// ////////////////////////////////////////////////
if ( subGuess ) guess_save.resize(nblock,grid); if ( subGuess ) guess_save.resize(nblock,grid);
for(int b=0;b<nblock;b++){
if(useSolnAsInitGuess) { if(useSolnAsInitGuess) {
for(int b=0;b<nblock;b++){
pickCheckerboard(Odd, sol_o[b], out[b]); pickCheckerboard(Odd, sol_o[b], out[b]);
} else {
guess(src_o[b],sol_o[b]);
} }
} else {
guess(src_o, sol_o);
}
if ( subGuess ) { if ( subGuess ) {
guess_save[b] = sol_o[b]; for(int b=0;b<nblock;b++){
} guess_save[b] = sol_o[b];
}
} }
////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////
// Call the block solver // Call the block solver

View File

@ -9,14 +9,30 @@ NAMESPACE_BEGIN(Grid);
#define AccSmall (3) #define AccSmall (3)
#define Shared (4) #define Shared (4)
#define SharedSmall (5) #define SharedSmall (5)
#undef GRID_MM_VERBOSE
uint64_t total_shared; uint64_t total_shared;
uint64_t total_device; uint64_t total_device;
uint64_t total_host;; uint64_t total_host;;
void MemoryManager::PrintBytes(void) void MemoryManager::PrintBytes(void)
{ {
std::cout << " MemoryManager : "<<total_shared<<" shared bytes "<<std::endl; std::cout << " MemoryManager : ------------------------------------ "<<std::endl;
std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl; std::cout << " MemoryManager : PrintBytes "<<std::endl;
std::cout << " MemoryManager : "<<total_host <<" cpu bytes "<<std::endl; std::cout << " MemoryManager : ------------------------------------ "<<std::endl;
std::cout << " MemoryManager : "<<(total_shared>>20)<<" shared Mbytes "<<std::endl;
std::cout << " MemoryManager : "<<(total_device>>20)<<" accelerator Mbytes "<<std::endl;
std::cout << " MemoryManager : "<<(total_host>>20) <<" cpu Mbytes "<<std::endl;
uint64_t cacheBytes;
cacheBytes = CacheBytes[Cpu];
std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" cpu cache Mbytes "<<std::endl;
cacheBytes = CacheBytes[Acc];
std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" acc cache Mbytes "<<std::endl;
cacheBytes = CacheBytes[Shared];
std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" shared cache Mbytes "<<std::endl;
#ifdef GRID_CUDA
cuda_mem();
#endif
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
@ -24,86 +40,114 @@ void MemoryManager::PrintBytes(void)
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
int MemoryManager::Victim[MemoryManager::NallocType]; int MemoryManager::Victim[MemoryManager::NallocType];
int MemoryManager::Ncache[MemoryManager::NallocType] = { 8, 32, 8, 32, 8, 32 }; int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 2, 8, 2, 8 };
uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType];
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// Actual allocation and deallocation utils // Actual allocation and deallocation utils
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
void *MemoryManager::AcceleratorAllocate(size_t bytes) void *MemoryManager::AcceleratorAllocate(size_t bytes)
{ {
total_device+=bytes;
void *ptr = (void *) Lookup(bytes,Acc); void *ptr = (void *) Lookup(bytes,Acc);
if ( ptr == (void *) NULL ) { if ( ptr == (void *) NULL ) {
ptr = (void *) acceleratorAllocDevice(bytes); ptr = (void *) acceleratorAllocDevice(bytes);
total_device+=bytes;
} }
#ifdef GRID_MM_VERBOSE
std::cout <<"AcceleratorAllocate "<<std::endl;
PrintBytes();
#endif
return ptr; return ptr;
} }
void MemoryManager::AcceleratorFree (void *ptr,size_t bytes) void MemoryManager::AcceleratorFree (void *ptr,size_t bytes)
{ {
total_device-=bytes;
void *__freeme = Insert(ptr,bytes,Acc); void *__freeme = Insert(ptr,bytes,Acc);
if ( __freeme ) { if ( __freeme ) {
acceleratorFreeDevice(__freeme); acceleratorFreeDevice(__freeme);
total_device-=bytes;
// PrintBytes();
} }
#ifdef GRID_MM_VERBOSE
std::cout <<"AcceleratorFree "<<std::endl;
PrintBytes();
#endif
} }
void *MemoryManager::SharedAllocate(size_t bytes) void *MemoryManager::SharedAllocate(size_t bytes)
{ {
total_shared+=bytes;
void *ptr = (void *) Lookup(bytes,Shared); void *ptr = (void *) Lookup(bytes,Shared);
if ( ptr == (void *) NULL ) { if ( ptr == (void *) NULL ) {
ptr = (void *) acceleratorAllocShared(bytes); ptr = (void *) acceleratorAllocShared(bytes);
total_shared+=bytes;
// std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
// PrintBytes();
} }
#ifdef GRID_MM_VERBOSE
std::cout <<"SharedAllocate "<<std::endl;
PrintBytes();
#endif
return ptr; return ptr;
} }
void MemoryManager::SharedFree (void *ptr,size_t bytes) void MemoryManager::SharedFree (void *ptr,size_t bytes)
{ {
total_shared-=bytes;
void *__freeme = Insert(ptr,bytes,Shared); void *__freeme = Insert(ptr,bytes,Shared);
if ( __freeme ) { if ( __freeme ) {
acceleratorFreeShared(__freeme); acceleratorFreeShared(__freeme);
total_shared-=bytes;
// PrintBytes();
} }
#ifdef GRID_MM_VERBOSE
std::cout <<"SharedFree "<<std::endl;
PrintBytes();
#endif
} }
#ifdef GRID_UVM #ifdef GRID_UVM
void *MemoryManager::CpuAllocate(size_t bytes) void *MemoryManager::CpuAllocate(size_t bytes)
{ {
total_host+=bytes;
void *ptr = (void *) Lookup(bytes,Cpu); void *ptr = (void *) Lookup(bytes,Cpu);
if ( ptr == (void *) NULL ) { if ( ptr == (void *) NULL ) {
ptr = (void *) acceleratorAllocShared(bytes); ptr = (void *) acceleratorAllocShared(bytes);
total_host+=bytes;
} }
#ifdef GRID_MM_VERBOSE
std::cout <<"CpuAllocate "<<std::endl;
PrintBytes();
#endif
return ptr; return ptr;
} }
void MemoryManager::CpuFree (void *_ptr,size_t bytes) void MemoryManager::CpuFree (void *_ptr,size_t bytes)
{ {
total_host-=bytes;
NotifyDeletion(_ptr); NotifyDeletion(_ptr);
void *__freeme = Insert(_ptr,bytes,Cpu); void *__freeme = Insert(_ptr,bytes,Cpu);
if ( __freeme ) { if ( __freeme ) {
acceleratorFreeShared(__freeme); acceleratorFreeShared(__freeme);
total_host-=bytes;
} }
#ifdef GRID_MM_VERBOSE
std::cout <<"CpuFree "<<std::endl;
PrintBytes();
#endif
} }
#else #else
void *MemoryManager::CpuAllocate(size_t bytes) void *MemoryManager::CpuAllocate(size_t bytes)
{ {
total_host+=bytes;
void *ptr = (void *) Lookup(bytes,Cpu); void *ptr = (void *) Lookup(bytes,Cpu);
if ( ptr == (void *) NULL ) { if ( ptr == (void *) NULL ) {
ptr = (void *) acceleratorAllocCpu(bytes); ptr = (void *) acceleratorAllocCpu(bytes);
total_host+=bytes;
} }
#ifdef GRID_MM_VERBOSE
std::cout <<"CpuAllocate "<<std::endl;
PrintBytes();
#endif
return ptr; return ptr;
} }
void MemoryManager::CpuFree (void *_ptr,size_t bytes) void MemoryManager::CpuFree (void *_ptr,size_t bytes)
{ {
total_host-=bytes;
NotifyDeletion(_ptr); NotifyDeletion(_ptr);
void *__freeme = Insert(_ptr,bytes,Cpu); void *__freeme = Insert(_ptr,bytes,Cpu);
if ( __freeme ) { if ( __freeme ) {
acceleratorFreeCpu(__freeme); acceleratorFreeCpu(__freeme);
total_host-=bytes;
} }
#ifdef GRID_MM_VERBOSE
std::cout <<"CpuFree "<<std::endl;
PrintBytes();
#endif
} }
#endif #endif
@ -115,7 +159,6 @@ void MemoryManager::Init(void)
char * str; char * str;
int Nc; int Nc;
int NcS;
str= getenv("GRID_ALLOC_NCACHE_LARGE"); str= getenv("GRID_ALLOC_NCACHE_LARGE");
if ( str ) { if ( str ) {
@ -181,13 +224,13 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,int type)
#ifdef ALLOCATION_CACHE #ifdef ALLOCATION_CACHE
bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); bool small = (bytes < GRID_ALLOC_SMALL_LIMIT);
int cache = type + small; int cache = type + small;
return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache]); return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache],CacheBytes[cache]);
#else #else
return ptr; return ptr;
#endif #endif
} }
void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim, uint64_t &cacheBytes)
{ {
assert(ncache>0); assert(ncache>0);
#ifdef GRID_OMP #ifdef GRID_OMP
@ -211,6 +254,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
if ( entries[v].valid ) { if ( entries[v].valid ) {
ret = entries[v].address; ret = entries[v].address;
cacheBytes -= entries[v].bytes;
entries[v].valid = 0; entries[v].valid = 0;
entries[v].address = NULL; entries[v].address = NULL;
entries[v].bytes = 0; entries[v].bytes = 0;
@ -219,6 +263,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
entries[v].address=ptr; entries[v].address=ptr;
entries[v].bytes =bytes; entries[v].bytes =bytes;
entries[v].valid =1; entries[v].valid =1;
cacheBytes += bytes;
return ret; return ret;
} }
@ -228,13 +273,13 @@ void *MemoryManager::Lookup(size_t bytes,int type)
#ifdef ALLOCATION_CACHE #ifdef ALLOCATION_CACHE
bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); bool small = (bytes < GRID_ALLOC_SMALL_LIMIT);
int cache = type+small; int cache = type+small;
return Lookup(bytes,Entries[cache],Ncache[cache]); return Lookup(bytes,Entries[cache],Ncache[cache],CacheBytes[cache]);
#else #else
return NULL; return NULL;
#endif #endif
} }
void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t & cacheBytes)
{ {
assert(ncache>0); assert(ncache>0);
#ifdef GRID_OMP #ifdef GRID_OMP
@ -243,6 +288,7 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach
for(int e=0;e<ncache;e++){ for(int e=0;e<ncache;e++){
if ( entries[e].valid && ( entries[e].bytes == bytes ) ) { if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
entries[e].valid = 0; entries[e].valid = 0;
cacheBytes -= entries[e].bytes;
return entries[e].address; return entries[e].address;
} }
} }

View File

@ -82,14 +82,15 @@ private:
static AllocationCacheEntry Entries[NallocType][NallocCacheMax]; static AllocationCacheEntry Entries[NallocType][NallocCacheMax];
static int Victim[NallocType]; static int Victim[NallocType];
static int Ncache[NallocType]; static int Ncache[NallocType];
static uint64_t CacheBytes[NallocType];
///////////////////////////////////////////////// /////////////////////////////////////////////////
// Free pool // Free pool
///////////////////////////////////////////////// /////////////////////////////////////////////////
static void *Insert(void *ptr,size_t bytes,int type) ; static void *Insert(void *ptr,size_t bytes,int type) ;
static void *Lookup(size_t bytes,int type) ; static void *Lookup(size_t bytes,int type) ;
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim,uint64_t &cbytes) ;
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ; static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t &cbytes) ;
static void PrintBytes(void); static void PrintBytes(void);
public: public:

View File

@ -3,7 +3,7 @@
#warning "Using explicit device memory copies" #warning "Using explicit device memory copies"
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
//define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout); //#define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout);
#define dprintf(...) #define dprintf(...)
@ -429,6 +429,7 @@ void MemoryManager::NotifyDeletion(void *_ptr)
} }
void MemoryManager::Print(void) void MemoryManager::Print(void)
{ {
PrintBytes();
std::cout << GridLogDebug << "--------------------------------------------" << std::endl; std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogDebug << "Memory Manager " << std::endl; std::cout << GridLogDebug << "Memory Manager " << std::endl;
std::cout << GridLogDebug << "--------------------------------------------" << std::endl; std::cout << GridLogDebug << "--------------------------------------------" << std::endl;

View File

@ -33,6 +33,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
bool Stencil_force_mpi = true;
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout // Info that is setup once and indept of cartesian layout
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////

View File

@ -35,6 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
extern bool Stencil_force_mpi ;
class CartesianCommunicator : public SharedMemory { class CartesianCommunicator : public SharedMemory {
public: public:

View File

@ -370,7 +370,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
double off_node_bytes=0.0; double off_node_bytes=0.0;
int tag; int tag;
if ( gfrom ==MPI_UNDEFINED) { if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+from*32; tag= dir+from*32;
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq); ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq);
assert(ierr==0); assert(ierr==0);
@ -378,12 +378,17 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
off_node_bytes+=bytes; off_node_bytes+=bytes;
} }
if ( gdest == MPI_UNDEFINED ) { if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+_processor*32; tag= dir+_processor*32;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
assert(ierr==0); assert(ierr==0);
list.push_back(xrq); list.push_back(xrq);
off_node_bytes+=bytes; off_node_bytes+=bytes;
} else {
// TODO : make a OMP loop on CPU, call threaded bcopy
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
assert(shm!=NULL);
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
} }
if ( CommunicatorPolicy == CommunicatorPolicySequential ) { if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
@ -399,6 +404,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
if (nreq==0) return; if (nreq==0) return;
std::vector<MPI_Status> status(nreq); std::vector<MPI_Status> status(nreq);
acceleratorCopySynchronise();
int ierr = MPI_Waitall(nreq,&list[0],&status[0]); int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0); assert(ierr==0);
list.resize(0); list.resize(0);

View File

@ -73,6 +73,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
WorldNodes = WorldSize/WorldShmSize; WorldNodes = WorldSize/WorldShmSize;
assert( (WorldNodes * WorldShmSize) == WorldSize ); assert( (WorldNodes * WorldShmSize) == WorldSize );
// FIXME: Check all WorldShmSize are the same ? // FIXME: Check all WorldShmSize are the same ?
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -451,7 +452,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
#if defined(GRID_CUDA) ||defined(GRID_HIP) || defined(GRID_SYCL) #if defined(GRID_CUDA) ||defined(GRID_HIP) || defined(GRID_SYCL)
#if defined(GRID_SYCL) //if defined(GRID_SYCL)
#if 0
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
void * ShmCommBuf ; void * ShmCommBuf ;
@ -488,7 +490,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
} }
#endif #endif
#if defined(GRID_CUDA) ||defined(GRID_HIP) #if defined(GRID_CUDA) ||defined(GRID_HIP) ||defined(GRID_SYCL)
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
void * ShmCommBuf ; void * ShmCommBuf ;
@ -512,18 +514,16 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// Each MPI rank should allocate our own buffer // Each MPI rank should allocate our own buffer
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
ShmCommBuf = acceleratorAllocDevice(bytes); ShmCommBuf = acceleratorAllocDevice(bytes);
if (ShmCommBuf == (void *)NULL ) { if (ShmCommBuf == (void *)NULL ) {
std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
// if ( WorldRank == 0 ){ if ( WorldRank == 0 ){
if ( 1 ){
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
} }
SharedMemoryZero(ShmCommBuf,bytes); SharedMemoryZero(ShmCommBuf,bytes);
std::cout<< "Setting up IPC"<<std::endl;
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Loop over ranks/gpu's on our node // Loop over ranks/gpu's on our node
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -533,6 +533,29 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
////////////////////////////////////////////////// //////////////////////////////////////////////////
// If it is me, pass around the IPC access key // If it is me, pass around the IPC access key
////////////////////////////////////////////////// //////////////////////////////////////////////////
void * thisBuf = ShmCommBuf;
if(!Stencil_force_mpi) {
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
typedef struct { int fd; pid_t pid ; } clone_mem_t;
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
auto zeContext = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
ze_ipc_mem_handle_t ihandle;
clone_mem_t handle;
if ( r==WorldShmRank ) {
auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle);
if ( err != ZE_RESULT_SUCCESS ) {
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
}
memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int));
handle.pid = getpid();
}
#endif
#ifdef GRID_CUDA #ifdef GRID_CUDA
cudaIpcMemHandle_t handle; cudaIpcMemHandle_t handle;
if ( r==WorldShmRank ) { if ( r==WorldShmRank ) {
@ -553,6 +576,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
} }
} }
#endif #endif
////////////////////////////////////////////////// //////////////////////////////////////////////////
// Share this IPC handle across the Shm Comm // Share this IPC handle across the Shm Comm
////////////////////////////////////////////////// //////////////////////////////////////////////////
@ -568,7 +592,35 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// If I am not the source, overwrite thisBuf with remote buffer // If I am not the source, overwrite thisBuf with remote buffer
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
void * thisBuf = ShmCommBuf;
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
if ( r!=WorldShmRank ) {
thisBuf = nullptr;
std::cout<<"mapping seeking remote pid/fd "
<<handle.pid<<"/"
<<handle.fd<<std::endl;
int pidfd = syscall(SYS_pidfd_open,handle.pid,0);
std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n";
// int myfd = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0);
int myfd = syscall(438,pidfd,handle.fd,0);
std::cout<<"Using IpcHandle myfd "<<myfd<<"\n";
memcpy((void *)&ihandle,(void *)&myfd,sizeof(int));
auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf);
if ( err != ZE_RESULT_SUCCESS ) {
std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl;
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle pointer is "<<std::hex<<thisBuf<<std::dec<<std::endl;
}
assert(thisBuf!=nullptr);
}
#endif
#ifdef GRID_CUDA #ifdef GRID_CUDA
if ( r!=WorldShmRank ) { if ( r!=WorldShmRank ) {
auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess); auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess);
@ -590,6 +642,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Save a copy of the device buffers // Save a copy of the device buffers
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
}
WorldShmCommBufs[r] = thisBuf; WorldShmCommBufs[r] = thisBuf;
#else #else
WorldShmCommBufs[r] = ShmCommBuf; WorldShmCommBufs[r] = ShmCommBuf;
@ -844,7 +897,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
} }
#endif #endif
SharedMemoryTest(); //SharedMemoryTest();
} }
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// On node barrier // On node barrier

View File

@ -225,7 +225,7 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &
autoView( x_v , x, AcceleratorRead); autoView( x_v , x, AcceleratorRead);
autoView( y_v , y, AcceleratorRead); autoView( y_v , y, AcceleratorRead);
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
auto tmp = a*x_v(ss)+y_v(ss); auto tmp = a*coalescedRead(x_v[ss])+coalescedRead(y_v[ss]);
coalescedWrite(ret_v[ss],tmp); coalescedWrite(ret_v[ss],tmp);
}); });
} }

View File

@ -125,7 +125,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
for(int k=k0; k<k1; ++k){ for(int k=k0; k<k1; ++k){
auto tmp = coalescedRead(Bp[ss*nrot+j]); auto tmp = coalescedRead(Bp[ss*nrot+j]);
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss])); coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_vp[k][sss]));
} }
}); });
@ -134,7 +134,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
int jj =j0+j; int jj =j0+j;
int ss =sj/nrot; int ss =sj/nrot;
int sss=ss+s; int sss=ss+s;
coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j])); coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j]));
}); });
} }
#endif #endif

View File

@ -361,6 +361,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
// But easily avoided by using double precision fields // But easily avoided by using double precision fields
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_object::scalar_type scalar_type;
GridBase *grid = Data.Grid(); GridBase *grid = Data.Grid();
assert(grid!=NULL); assert(grid!=NULL);
@ -419,20 +420,19 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
} }
// sum over nodes. // sum over nodes.
sobj gsum;
for(int t=0;t<fd;t++){ for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane int pt = t/ld; // processor plane
int lt = t%ld; int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) { if ( pt == grid->_processor_coor[orthogdim] ) {
gsum=lsSum[lt]; result[t]=lsSum[lt];
} else { } else {
gsum=Zero(); result[t]=Zero();
} }
grid->GlobalSum(gsum);
result[t]=gsum;
} }
scalar_type * ptr = (scalar_type *) &result[0];
int words = fd*sizeof(sobj)/sizeof(scalar_type);
grid->GlobalSumVector(ptr, words);
} }
template<class vobj> template<class vobj>

View File

@ -42,7 +42,6 @@ void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator
std::cout << GridLogDebug << "\twarpSize = " << warpSize << std::endl; std::cout << GridLogDebug << "\twarpSize = " << warpSize << std::endl;
std::cout << GridLogDebug << "\tsharedMemPerBlock = " << sharedMemPerBlock << std::endl; std::cout << GridLogDebug << "\tsharedMemPerBlock = " << sharedMemPerBlock << std::endl;
std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << maxThreadsPerBlock << std::endl; std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << maxThreadsPerBlock << std::endl;
std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << warpSize << std::endl;
std::cout << GridLogDebug << "\tmultiProcessorCount = " << multiProcessorCount << std::endl; std::cout << GridLogDebug << "\tmultiProcessorCount = " << multiProcessorCount << std::endl;
if (warpSize != WARP_SIZE) { if (warpSize != WARP_SIZE) {
@ -52,6 +51,10 @@ void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator
// let the number of threads in a block be a multiple of 2, starting from warpSize // let the number of threads in a block be a multiple of 2, starting from warpSize
threads = warpSize; threads = warpSize;
if ( threads*sizeofsobj > sharedMemPerBlock ) {
std::cout << GridLogError << "The object is too large for the shared memory." << std::endl;
exit(EXIT_FAILURE);
}
while( 2*threads*sizeofsobj < sharedMemPerBlock && 2*threads <= maxThreadsPerBlock ) threads *= 2; while( 2*threads*sizeofsobj < sharedMemPerBlock && 2*threads <= maxThreadsPerBlock ) threads *= 2;
// keep all the streaming multiprocessors busy // keep all the streaming multiprocessors busy
blocks = nextPow2(multiProcessorCount); blocks = nextPow2(multiProcessorCount);

View File

@ -85,6 +85,76 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
}); });
} }
template<class vobj> inline void acceleratorPickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full, int checker_dim_half=0)
{
half.Checkerboard() = cb;
autoView(half_v, half, AcceleratorWrite);
autoView(full_v, full, AcceleratorRead);
Coordinate rdim_full = full.Grid()->_rdimensions;
Coordinate rdim_half = half.Grid()->_rdimensions;
unsigned long ndim_half = half.Grid()->_ndimension;
Coordinate checker_dim_mask_half = half.Grid()->_checker_dim_mask;
Coordinate ostride_half = half.Grid()->_ostride;
accelerator_for(ss, full.Grid()->oSites(),full.Grid()->Nsimd(),{
Coordinate coor;
int cbos;
int linear=0;
Lexicographic::CoorFromIndex(coor,ss,rdim_full);
assert(coor.size()==ndim_half);
for(int d=0;d<ndim_half;d++){
if(checker_dim_mask_half[d]) linear += coor[d];
}
cbos = (linear&0x1);
if (cbos==cb) {
int ssh=0;
for(int d=0;d<ndim_half;d++) {
if (d == checker_dim_half) ssh += ostride_half[d] * ((coor[d] / 2) % rdim_half[d]);
else ssh += ostride_half[d] * (coor[d] % rdim_half[d]);
}
coalescedWrite(half_v[ssh],full_v(ss));
}
});
}
template<class vobj> inline void acceleratorSetCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half, int checker_dim_half=0)
{
int cb = half.Checkerboard();
autoView(half_v , half, AcceleratorRead);
autoView(full_v , full, AcceleratorWrite);
Coordinate rdim_full = full.Grid()->_rdimensions;
Coordinate rdim_half = half.Grid()->_rdimensions;
unsigned long ndim_half = half.Grid()->_ndimension;
Coordinate checker_dim_mask_half = half.Grid()->_checker_dim_mask;
Coordinate ostride_half = half.Grid()->_ostride;
accelerator_for(ss,full.Grid()->oSites(),full.Grid()->Nsimd(),{
Coordinate coor;
int cbos;
int linear=0;
Lexicographic::CoorFromIndex(coor,ss,rdim_full);
assert(coor.size()==ndim_half);
for(int d=0;d<ndim_half;d++){
if(checker_dim_mask_half[d]) linear += coor[d];
}
cbos = (linear&0x1);
if (cbos==cb) {
int ssh=0;
for(int d=0;d<ndim_half;d++){
if (d == checker_dim_half) ssh += ostride_half[d] * ((coor[d] / 2) % rdim_half[d]);
else ssh += ostride_half[d] * (coor[d] % rdim_half[d]);
}
coalescedWrite(full_v[ss],half_v(ssh));
}
});
}
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
// Flexible Type Conversion for internal promotion to double as well as graceful // Flexible Type Conversion for internal promotion to double as well as graceful
// treatment of scalar-compatible types // treatment of scalar-compatible types
@ -364,16 +434,22 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
autoView( coarseData_ , coarseData, AcceleratorWrite); autoView( coarseData_ , coarseData, AcceleratorWrite);
autoView( fineData_ , fineData, AcceleratorRead); autoView( fineData_ , fineData, AcceleratorRead);
auto coarseData_p = &coarseData_[0];
auto fineData_p = &fineData_[0];
Coordinate fine_rdimensions = fine->_rdimensions; Coordinate fine_rdimensions = fine->_rdimensions;
Coordinate coarse_rdimensions = coarse->_rdimensions; Coordinate coarse_rdimensions = coarse->_rdimensions;
vobj zz = Zero();
accelerator_for(sc,coarse->oSites(),1,{ accelerator_for(sc,coarse->oSites(),1,{
// One thread per sub block // One thread per sub block
Coordinate coor_c(_ndimension); Coordinate coor_c(_ndimension);
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
coarseData_[sc]=Zero();
vobj cd = zz;
for(int sb=0;sb<blockVol;sb++){ for(int sb=0;sb<blockVol;sb++){
int sf; int sf;
@ -383,9 +459,11 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions);
coarseData_[sc]=coarseData_[sc]+fineData_[sf]; cd=cd+fineData_p[sf];
} }
coarseData_p[sc] = cd;
}); });
return; return;
} }

View File

@ -576,6 +576,7 @@ class ScidacReader : public GridLimeReader {
std::string rec_name(ILDG_BINARY_DATA); std::string rec_name(ILDG_BINARY_DATA);
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) { if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
skipPastObjectRecord(std::string(GRID_FIELD_NORM));
skipPastObjectRecord(std::string(SCIDAC_CHECKSUM)); skipPastObjectRecord(std::string(SCIDAC_CHECKSUM));
return; return;
} }

View File

@ -115,9 +115,9 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR;
typedef WilsonFermion<WilsonImplF> WilsonFermionF; typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD; typedef WilsonFermion<WilsonImplD> WilsonFermionD;
typedef WilsonFermion<WilsonImplRL> WilsonFermionRL; //typedef WilsonFermion<WilsonImplRL> WilsonFermionRL;
typedef WilsonFermion<WilsonImplFH> WilsonFermionFH; //typedef WilsonFermion<WilsonImplFH> WilsonFermionFH;
typedef WilsonFermion<WilsonImplDF> WilsonFermionDF; //typedef WilsonFermion<WilsonImplDF> WilsonFermionDF;
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
@ -158,41 +158,41 @@ typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;
typedef DomainWallFermion<WilsonImplF> DomainWallFermionF; typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL; //typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL;
typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH; //typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH;
typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF; //typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF;
typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR; typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR;
typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF; typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF;
typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD; typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD;
typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL; //typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL;
typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH; //typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH;
typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF; //typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF;
typedef MobiusFermion<WilsonImplR> MobiusFermionR; typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF; typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD; typedef MobiusFermion<WilsonImplD> MobiusFermionD;
typedef MobiusFermion<WilsonImplRL> MobiusFermionRL; //typedef MobiusFermion<WilsonImplRL> MobiusFermionRL;
typedef MobiusFermion<WilsonImplFH> MobiusFermionFH; //typedef MobiusFermion<WilsonImplFH> MobiusFermionFH;
typedef MobiusFermion<WilsonImplDF> MobiusFermionDF; //typedef MobiusFermion<WilsonImplDF> MobiusFermionDF;
typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR; typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR;
typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF; typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF;
typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD; typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD;
typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL; //typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL;
typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH; //typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH;
typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF; //typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF;
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL; //typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL;
typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH; //typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH;
typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF; //typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF;
// Ls vectorised // Ls vectorised
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
@ -235,49 +235,49 @@ typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR;
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF; typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD; typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
typedef WilsonFermion<GparityWilsonImplRL> GparityWilsonFermionRL; //typedef WilsonFermion<GparityWilsonImplRL> GparityWilsonFermionRL;
typedef WilsonFermion<GparityWilsonImplFH> GparityWilsonFermionFH; //typedef WilsonFermion<GparityWilsonImplFH> GparityWilsonFermionFH;
typedef WilsonFermion<GparityWilsonImplDF> GparityWilsonFermionDF; //typedef WilsonFermion<GparityWilsonImplDF> GparityWilsonFermionDF;
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR; typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF; typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD; typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL; //typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL;
typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH; //typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH;
typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF; //typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF;
typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR; typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR;
typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF; typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF;
typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD; typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD;
typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL; //typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL;
typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH; //typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH;
typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF; //typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF;
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR; typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF; typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD; typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL; //typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL;
typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH; //typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH;
typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF; //typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF;
typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR; typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF; typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL; //typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL;
typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH; //typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH;
typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF; //typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF;
typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR; typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR;
typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF; typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF;
typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD; typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD;
typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL; //typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL;
typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH; //typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH;
typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF; //typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF;
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR; typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF; typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;

View File

@ -327,8 +327,8 @@ typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> Gparit
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF; // Float typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD; // Double typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD; // Double
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec //typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH; // Float //typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH; // Float
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF; // Double //typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF; // Double
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -68,11 +68,12 @@ public:
/*****************************************************/ /*****************************************************/
/* Compress includes precision change if mpi data is not same */ /* Compress includes precision change if mpi data is not same */
/*****************************************************/ /*****************************************************/
template<class _SiteHalfSpinor, class _SiteSpinor> accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const {
accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { typedef decltype(coalescedRead(buf)) sobj;
_SiteHalfSpinor tmp; sobj sp;
projector::Proj(tmp,in,mu,dag); auto sin = coalescedRead(in);
vstream(buf[o],tmp); projector::Proj(sp,sin,mu,dag);
coalescedWrite(buf,sp);
} }
/*****************************************************/ /*****************************************************/
@ -82,13 +83,18 @@ public:
const SiteHalfSpinor * __restrict__ vp0, const SiteHalfSpinor * __restrict__ vp0,
const SiteHalfSpinor * __restrict__ vp1, const SiteHalfSpinor * __restrict__ vp1,
Integer type,Integer o) const { Integer type,Integer o) const {
#ifdef GRID_SIMT
exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type);
#else
SiteHalfSpinor tmp1; SiteHalfSpinor tmp1;
SiteHalfSpinor tmp2; SiteHalfSpinor tmp2;
exchange(tmp1,tmp2,vp0[o],vp1[o],type); exchange(tmp1,tmp2,vp0[o],vp1[o],type);
vstream(mp[2*o ],tmp1); vstream(mp[2*o ],tmp1);
vstream(mp[2*o+1],tmp2); vstream(mp[2*o+1],tmp2);
#endif
} }
/*****************************************************/ /*****************************************************/
/* Have a decompression step if mpi data is not same */ /* Have a decompression step if mpi data is not same */
/*****************************************************/ /*****************************************************/
@ -105,6 +111,28 @@ public:
const SiteSpinor * __restrict__ in, const SiteSpinor * __restrict__ in,
Integer j,Integer k, Integer m,Integer type) const Integer j,Integer k, Integer m,Integer type) const
{ {
#ifdef GRID_SIMT
typedef SiteSpinor vobj;
typedef SiteHalfSpinor hvobj;
typedef decltype(coalescedRead(*in)) sobj;
typedef decltype(coalescedRead(*out0)) hsobj;
unsigned int Nsimd = vobj::Nsimd();
unsigned int mask = Nsimd >> (type + 1);
int lane = acceleratorSIMTlane(Nsimd);
int j0 = lane &(~mask); // inner coor zero
int j1 = lane |(mask) ; // inner coor one
const vobj *vp0 = &in[k];
const vobj *vp1 = &in[m];
const vobj *vp = (lane&mask) ? vp1:vp0;
auto sa = coalescedRead(*vp,j0);
auto sb = coalescedRead(*vp,j1);
hsobj psa, psb;
projector::Proj(psa,sa,mu,dag);
projector::Proj(psb,sb,mu,dag);
coalescedWrite(out0[j],psa);
coalescedWrite(out1[j],psb);
#else
SiteHalfSpinor temp1, temp2; SiteHalfSpinor temp1, temp2;
SiteHalfSpinor temp3, temp4; SiteHalfSpinor temp3, temp4;
projector::Proj(temp1,in[k],mu,dag); projector::Proj(temp1,in[k],mu,dag);
@ -112,6 +140,7 @@ public:
exchange(temp3,temp4,temp1,temp2,type); exchange(temp3,temp4,temp1,temp2,type);
vstream(out0[j],temp3); vstream(out0[j],temp3);
vstream(out1[j],temp4); vstream(out1[j],temp4);
#endif
} }
/*****************************************************/ /*****************************************************/
@ -121,6 +150,7 @@ public:
}; };
#if 0
template<class _HCspinor,class _Hspinor,class _Spinor, class projector> template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type > typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
@ -149,13 +179,23 @@ public:
/*****************************************************/ /*****************************************************/
/* Compress includes precision change if mpi data is not same */ /* Compress includes precision change if mpi data is not same */
/*****************************************************/ /*****************************************************/
template<class _SiteHalfSpinor, class _SiteSpinor> accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const {
accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { SiteHalfSpinor hsp;
_SiteHalfSpinor hsp;
SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf;
projector::Proj(hsp,in,mu,dag); projector::Proj(hsp,in,mu,dag);
precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw);
} }
accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const {
#ifdef GRID_SIMT
typedef decltype(coalescedRead(buf)) sobj;
sobj sp;
auto sin = coalescedRead(in);
projector::Proj(sp,sin,mu,dag);
coalescedWrite(buf,sp);
#else
projector::Proj(buf,in,mu,dag);
#endif
}
/*****************************************************/ /*****************************************************/
/* Exchange includes precision change if mpi data is not same */ /* Exchange includes precision change if mpi data is not same */
@ -203,6 +243,7 @@ public:
accelerator_inline bool DecompressionStep(void) const { return true; } accelerator_inline bool DecompressionStep(void) const { return true; }
}; };
#endif
#define DECLARE_PROJ(Projector,Compressor,spProj) \ #define DECLARE_PROJ(Projector,Compressor,spProj) \
class Projector { \ class Projector { \
@ -253,33 +294,8 @@ public:
typedef typename Base::View_type View_type; typedef typename Base::View_type View_type;
typedef typename Base::StencilVector StencilVector; typedef typename Base::StencilVector StencilVector;
double timer0; void ZeroCountersi(void) { }
double timer1; void Reporti(int calls) { }
double timer2;
double timer3;
double timer4;
double timer5;
double timer6;
uint64_t callsi;
void ZeroCountersi(void)
{
timer0=0;
timer1=0;
timer2=0;
timer3=0;
timer4=0;
timer5=0;
timer6=0;
callsi=0;
}
void Reporti(int calls)
{
if ( timer0 ) std::cout << GridLogMessage << " timer0 (HaloGatherOpt) " <<timer0/calls <<std::endl;
if ( timer1 ) std::cout << GridLogMessage << " timer1 (Communicate) " <<timer1/calls <<std::endl;
if ( timer2 ) std::cout << GridLogMessage << " timer2 (CommsMerge ) " <<timer2/calls <<std::endl;
if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " <<timer3/calls <<std::endl;
if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl;
}
std::vector<int> surface_list; std::vector<int> surface_list;
@ -321,26 +337,18 @@ public:
{ {
std::vector<std::vector<CommsRequest_t> > reqs; std::vector<std::vector<CommsRequest_t> > reqs;
this->HaloExchangeOptGather(source,compress); this->HaloExchangeOptGather(source,compress);
double t1=usecond();
// Asynchronous MPI calls multidirectional, Isend etc... // Asynchronous MPI calls multidirectional, Isend etc...
// Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways.
this->Communicate(); this->Communicate();
double t2=usecond(); timer1 += t2-t1;
this->CommsMerge(compress); this->CommsMerge(compress);
double t3=usecond(); timer2 += t3-t2;
this->CommsMergeSHM(compress); this->CommsMergeSHM(compress);
double t4=usecond(); timer3 += t4-t3;
} }
template <class compressor> template <class compressor>
void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress) void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)
{ {
this->Prepare(); this->Prepare();
double t0=usecond();
this->HaloGatherOpt(source,compress); this->HaloGatherOpt(source,compress);
double t1=usecond();
timer0 += t1-t0;
callsi++;
} }
template <class compressor> template <class compressor>
@ -352,12 +360,9 @@ public:
typedef typename compressor::SiteHalfSpinor SiteHalfSpinor; typedef typename compressor::SiteHalfSpinor SiteHalfSpinor;
typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor;
this->mpi3synctime_g-=usecond();
this->_grid->StencilBarrier(); this->_grid->StencilBarrier();
this->mpi3synctime_g+=usecond();
assert(source.Grid()==this->_grid); assert(source.Grid()==this->_grid);
this->halogtime-=usecond();
this->u_comm_offset=0; this->u_comm_offset=0;
@ -393,7 +398,6 @@ public:
} }
this->face_table_computed=1; this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size); assert(this->u_comm_offset==this->_unified_buffer_size);
this->halogtime+=usecond();
accelerator_barrier(); accelerator_barrier();
} }

View File

@ -243,17 +243,17 @@ typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > WilsonImplR
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec //typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec //typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float

View File

@ -902,8 +902,8 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
int sp = (s+1)%Ls; int sp = (s+1)%Ls;
int sr = Ls-1-s; // int sr = Ls-1-s;
int srp= (sr+1)%Ls; // int srp= (sr+1)%Ls;
// Mobius parameters // Mobius parameters
auto b=this->bs[s]; auto b=this->bs[s];

View File

@ -73,17 +73,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -102,17 +102,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -131,17 +131,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
@ -165,17 +165,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -194,17 +194,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -223,17 +223,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
@ -280,17 +280,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -309,17 +309,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -338,17 +338,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
@ -371,17 +371,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -400,17 +400,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -429,17 +429,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>

View File

@ -74,15 +74,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -97,15 +97,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -121,15 +121,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single // XYZT vectorised, dag Kernel, single
@ -148,15 +148,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -171,15 +171,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -194,15 +194,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM #undef MAYBEPERM
#undef MULT_2SPIN #undef MULT_2SPIN
@ -228,14 +228,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeF
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -249,14 +249,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -273,15 +273,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single // Ls vectorised, dag Kernel, single
@ -299,14 +299,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -320,14 +320,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, Doubled
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -341,14 +341,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, Doubled
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif // VEC 5D #endif // VEC 5D
@ -392,14 +392,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -413,14 +413,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -434,14 +434,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single // XYZT vectorised, dag Kernel, single
@ -459,14 +459,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -480,14 +480,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -501,14 +501,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM #undef MAYBEPERM
#undef MULT_2SPIN #undef MULT_2SPIN
@ -533,14 +533,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeF
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -554,14 +554,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -577,14 +577,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single // Ls vectorised, dag Kernel, single
@ -602,14 +602,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -623,14 +623,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, Doubled
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -645,14 +645,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, Doubled
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif // VEC 5D #endif // VEC 5D

View File

@ -77,23 +77,23 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define REGISTER #define REGISTER
#ifdef GRID_SIMT #ifdef GRID_SIMT
#define LOAD_CHIMU(ptype) \ #define LOAD_CHIMU(Ptype) \
{const SiteSpinor & ref (in[offset]); \ {const SiteSpinor & ref (in[offset]); \
Chimu_00=coalescedReadPermute<ptype>(ref()(0)(0),perm,lane); \ Chimu_00=coalescedReadPermute<Ptype>(ref()(0)(0),perm,lane); \
Chimu_01=coalescedReadPermute<ptype>(ref()(0)(1),perm,lane); \ Chimu_01=coalescedReadPermute<Ptype>(ref()(0)(1),perm,lane); \
Chimu_02=coalescedReadPermute<ptype>(ref()(0)(2),perm,lane); \ Chimu_02=coalescedReadPermute<Ptype>(ref()(0)(2),perm,lane); \
Chimu_10=coalescedReadPermute<ptype>(ref()(1)(0),perm,lane); \ Chimu_10=coalescedReadPermute<Ptype>(ref()(1)(0),perm,lane); \
Chimu_11=coalescedReadPermute<ptype>(ref()(1)(1),perm,lane); \ Chimu_11=coalescedReadPermute<Ptype>(ref()(1)(1),perm,lane); \
Chimu_12=coalescedReadPermute<ptype>(ref()(1)(2),perm,lane); \ Chimu_12=coalescedReadPermute<Ptype>(ref()(1)(2),perm,lane); \
Chimu_20=coalescedReadPermute<ptype>(ref()(2)(0),perm,lane); \ Chimu_20=coalescedReadPermute<Ptype>(ref()(2)(0),perm,lane); \
Chimu_21=coalescedReadPermute<ptype>(ref()(2)(1),perm,lane); \ Chimu_21=coalescedReadPermute<Ptype>(ref()(2)(1),perm,lane); \
Chimu_22=coalescedReadPermute<ptype>(ref()(2)(2),perm,lane); \ Chimu_22=coalescedReadPermute<Ptype>(ref()(2)(2),perm,lane); \
Chimu_30=coalescedReadPermute<ptype>(ref()(3)(0),perm,lane); \ Chimu_30=coalescedReadPermute<Ptype>(ref()(3)(0),perm,lane); \
Chimu_31=coalescedReadPermute<ptype>(ref()(3)(1),perm,lane); \ Chimu_31=coalescedReadPermute<Ptype>(ref()(3)(1),perm,lane); \
Chimu_32=coalescedReadPermute<ptype>(ref()(3)(2),perm,lane); } Chimu_32=coalescedReadPermute<Ptype>(ref()(3)(2),perm,lane); }
#define PERMUTE_DIR(dir) ; #define PERMUTE_DIR(dir) ;
#else #else
#define LOAD_CHIMU(ptype) \ #define LOAD_CHIMU(Ptype) \
{const SiteSpinor & ref (in[offset]); \ {const SiteSpinor & ref (in[offset]); \
Chimu_00=ref()(0)(0);\ Chimu_00=ref()(0)(0);\
Chimu_01=ref()(0)(1);\ Chimu_01=ref()(0)(1);\
@ -109,12 +109,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Chimu_32=ref()(3)(2);} Chimu_32=ref()(3)(2);}
#define PERMUTE_DIR(dir) \ #define PERMUTE_DIR(dir) \
permute##dir(Chi_00,Chi_00); \ permute##dir(Chi_00,Chi_00); \
permute##dir(Chi_01,Chi_01);\ permute##dir(Chi_01,Chi_01); \
permute##dir(Chi_02,Chi_02);\ permute##dir(Chi_02,Chi_02); \
permute##dir(Chi_10,Chi_10); \ permute##dir(Chi_10,Chi_10); \
permute##dir(Chi_11,Chi_11);\ permute##dir(Chi_11,Chi_11); \
permute##dir(Chi_12,Chi_12); permute##dir(Chi_12,Chi_12);
#endif #endif
@ -371,88 +371,91 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
result_32-= UChi_12; result_32-= UChi_12;
#define HAND_STENCIL_LEGB(PROJ,PERM,DIR,RECON) \ #define HAND_STENCIL_LEGB(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \ {int ptype; \
offset = SE->_offset; \ SE=st.GetEntry(ptype,DIR,ss); \
local = SE->_is_local; \ auto offset = SE->_offset; \
perm = SE->_permute; \ auto local = SE->_is_local; \
if ( local ) { \ auto perm = SE->_permute; \
LOAD_CHIMU(PERM); \ if ( local ) { \
PROJ; \ LOAD_CHIMU(PERM); \
if ( perm) { \ PROJ; \
PERMUTE_DIR(PERM); \ if ( perm) { \
} \ PERMUTE_DIR(PERM); \
} else { \ } \
LOAD_CHI; \ } else { \
} \ LOAD_CHI; \
acceleratorSynchronise(); \ } \
MULT_2SPIN(DIR); \ acceleratorSynchronise(); \
RECON; MULT_2SPIN(DIR); \
RECON; }
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \ #define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
SE=&st_p[DIR+8*ss]; \ { SE=&st_p[DIR+8*ss]; \
ptype=st_perm[DIR]; \ auto ptype=st_perm[DIR]; \
offset = SE->_offset; \ auto offset = SE->_offset; \
local = SE->_is_local; \ auto local = SE->_is_local; \
perm = SE->_permute; \ auto perm = SE->_permute; \
if ( local ) { \ if ( local ) { \
LOAD_CHIMU(PERM); \ LOAD_CHIMU(PERM); \
PROJ; \ PROJ; \
if ( perm) { \ if ( perm) { \
PERMUTE_DIR(PERM); \ PERMUTE_DIR(PERM); \
} \ } \
} else { \ } else { \
LOAD_CHI; \ LOAD_CHI; \
} \ } \
acceleratorSynchronise(); \ acceleratorSynchronise(); \
MULT_2SPIN(DIR); \ MULT_2SPIN(DIR); \
RECON; RECON; }
#define HAND_STENCIL_LEGA(PROJ,PERM,DIR,RECON) \ #define HAND_STENCIL_LEGA(PROJ,PERM,DIR,RECON) \
SE=&st_p[DIR+8*ss]; \ { SE=&st_p[DIR+8*ss]; \
ptype=st_perm[DIR]; \ auto ptype=st_perm[DIR]; \
/*SE=st.GetEntry(ptype,DIR,ss);*/ \ /*SE=st.GetEntry(ptype,DIR,ss);*/ \
offset = SE->_offset; \ auto offset = SE->_offset; \
perm = SE->_permute; \ auto perm = SE->_permute; \
LOAD_CHIMU(PERM); \ LOAD_CHIMU(PERM); \
PROJ; \ PROJ; \
MULT_2SPIN(DIR); \ MULT_2SPIN(DIR); \
RECON; RECON; }
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \ #define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \ { int ptype; \
offset = SE->_offset; \ SE=st.GetEntry(ptype,DIR,ss); \
local = SE->_is_local; \ auto offset = SE->_offset; \
perm = SE->_permute; \ auto local = SE->_is_local; \
if ( local ) { \ auto perm = SE->_permute; \
LOAD_CHIMU(PERM); \ if ( local ) { \
PROJ; \ LOAD_CHIMU(PERM); \
if ( perm) { \ PROJ; \
PERMUTE_DIR(PERM); \ if ( perm) { \
} \ PERMUTE_DIR(PERM); \
} else if ( st.same_node[DIR] ) { \ } \
LOAD_CHI; \ } else if ( st.same_node[DIR] ) { \
} \ LOAD_CHI; \
acceleratorSynchronise(); \ } \
if (local || st.same_node[DIR] ) { \ acceleratorSynchronise(); \
MULT_2SPIN(DIR); \ if (local || st.same_node[DIR] ) { \
RECON; \ MULT_2SPIN(DIR); \
} \ RECON; \
acceleratorSynchronise(); } \
acceleratorSynchronise(); }
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \ #define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \ { int ptype; \
offset = SE->_offset; \ SE=st.GetEntry(ptype,DIR,ss); \
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \ auto offset = SE->_offset; \
LOAD_CHI; \ if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
MULT_2SPIN(DIR); \ LOAD_CHI; \
RECON; \ MULT_2SPIN(DIR); \
nmu++; \ RECON; \
} \ nmu++; \
acceleratorSynchronise(); } \
acceleratorSynchronise(); }
#define HAND_RESULT(ss) \ #define HAND_RESULT(ss) \
{ \ { \
SiteSpinor & ref (out[ss]); \ SiteSpinor & ref (out[ss]); \
coalescedWrite(ref()(0)(0),result_00,lane); \ coalescedWrite(ref()(0)(0),result_00,lane); \
coalescedWrite(ref()(0)(1),result_01,lane); \ coalescedWrite(ref()(0)(1),result_01,lane); \
coalescedWrite(ref()(0)(2),result_02,lane); \ coalescedWrite(ref()(0)(2),result_02,lane); \
@ -563,7 +566,6 @@ WilsonKernels<Impl>::HandDhopSiteSycl(StencilVector st_perm,StencilEntry *st_p,
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
int offset,local,perm, ptype;
StencilEntry *SE; StencilEntry *SE;
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON); HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM); HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
@ -593,9 +595,7 @@ WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,Site
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
int offset,local,perm, ptype;
StencilEntry *SE; StencilEntry *SE;
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON); HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM); HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM); HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
@ -623,8 +623,6 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
StencilEntry *SE; StencilEntry *SE;
int offset,local,perm, ptype;
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON); HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM); HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM); HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
@ -640,8 +638,8 @@ template<class Impl> accelerator_inline void
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
auto st_p = st._entries_p; // auto st_p = st._entries_p;
auto st_perm = st._permute_type; // auto st_perm = st._permute_type;
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -652,7 +650,6 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,Si
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
int offset,local,perm, ptype;
StencilEntry *SE; StencilEntry *SE;
ZERO_RESULT; ZERO_RESULT;
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM); HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
@ -670,8 +667,8 @@ template<class Impl> accelerator_inline
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
auto st_p = st._entries_p; // auto st_p = st._entries_p;
auto st_perm = st._permute_type; // auto st_perm = st._permute_type;
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
typedef decltype( coalescedRead( in[0]()(0)(0) )) Simt; typedef decltype( coalescedRead( in[0]()(0)(0) )) Simt;
@ -682,7 +679,6 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldVi
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
StencilEntry *SE; StencilEntry *SE;
int offset,local,perm, ptype;
ZERO_RESULT; ZERO_RESULT;
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM); HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM); HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
@ -699,8 +695,8 @@ template<class Impl> accelerator_inline void
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
auto st_p = st._entries_p; // auto st_p = st._entries_p;
auto st_perm = st._permute_type; // auto st_perm = st._permute_type;
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -711,7 +707,7 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,Si
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
int offset, ptype; // int offset, ptype;
StencilEntry *SE; StencilEntry *SE;
int nmu=0; int nmu=0;
ZERO_RESULT; ZERO_RESULT;
@ -730,8 +726,8 @@ template<class Impl> accelerator_inline
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
auto st_p = st._entries_p; // auto st_p = st._entries_p;
auto st_perm = st._permute_type; // auto st_perm = st._permute_type;
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
typedef decltype( coalescedRead( in[0]()(0)(0) )) Simt; typedef decltype( coalescedRead( in[0]()(0)(0) )) Simt;
@ -742,7 +738,7 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldVi
HAND_DECLARATIONS(Simt); HAND_DECLARATIONS(Simt);
StencilEntry *SE; StencilEntry *SE;
int offset, ptype; // int offset, ptype;
int nmu=0; int nmu=0;
ZERO_RESULT; ZERO_RESULT;
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM); HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonKernelsInstantiationGparity.cc.master

View File

@ -1 +0,0 @@
#define IMPLEMENTATION GparityWilsonImplDF

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonKernelsInstantiationGparity.cc.master

View File

@ -1 +0,0 @@
#define IMPLEMENTATION GparityWilsonImplFH

View File

@ -1 +0,0 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonTMFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
#define IMPLEMENTATION WilsonImplDF

View File

@ -1 +0,0 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermionInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1 +0,0 @@
../WilsonTMFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
#define IMPLEMENTATION WilsonImplFH

View File

@ -1 +0,0 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1 +0,0 @@
#define IMPLEMENTATION ZWilsonImplDF

View File

@ -1 +0,0 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1 +0,0 @@
#define IMPLEMENTATION ZWilsonImplFH

View File

@ -9,8 +9,6 @@ STAG5_IMPL_LIST=""
WILSON_IMPL_LIST=" \ WILSON_IMPL_LIST=" \
WilsonImplF \ WilsonImplF \
WilsonImplD \ WilsonImplD \
WilsonImplFH \
WilsonImplDF \
WilsonAdjImplF \ WilsonAdjImplF \
WilsonAdjImplD \ WilsonAdjImplD \
WilsonTwoIndexSymmetricImplF \ WilsonTwoIndexSymmetricImplF \
@ -18,26 +16,17 @@ WILSON_IMPL_LIST=" \
WilsonTwoIndexAntiSymmetricImplF \ WilsonTwoIndexAntiSymmetricImplF \
WilsonTwoIndexAntiSymmetricImplD \ WilsonTwoIndexAntiSymmetricImplD \
GparityWilsonImplF \ GparityWilsonImplF \
GparityWilsonImplD \ GparityWilsonImplD "
GparityWilsonImplFH \
GparityWilsonImplDF"
DWF_IMPL_LIST=" \ DWF_IMPL_LIST=" \
WilsonImplF \ WilsonImplF \
WilsonImplD \ WilsonImplD \
WilsonImplFH \
WilsonImplDF \
ZWilsonImplF \ ZWilsonImplF \
ZWilsonImplD \ ZWilsonImplD "
ZWilsonImplFH \
ZWilsonImplDF "
GDWF_IMPL_LIST=" \ GDWF_IMPL_LIST=" \
GparityWilsonImplF \ GparityWilsonImplF \
GparityWilsonImplD \ GparityWilsonImplD "
GparityWilsonImplFH \
GparityWilsonImplDF"
IMPL_LIST="$STAG_IMPL_LIST $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST" IMPL_LIST="$STAG_IMPL_LIST $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST"

View File

@ -78,6 +78,8 @@ public:
typedef Lattice<SiteLink> LinkField; typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field; typedef Lattice<SiteField> Field;
typedef SU<Nrepresentation> Group;
// Guido: we can probably separate the types from the HMC functions // Guido: we can probably separate the types from the HMC functions
// this will create 2 kind of implementations // this will create 2 kind of implementations
// probably confusing the users // probably confusing the users
@ -118,7 +120,7 @@ public:
LinkField Pmu(P.Grid()); LinkField Pmu(P.Grid());
Pmu = Zero(); Pmu = Zero();
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu); Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ; RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ;
Pmu = Pmu*scale; Pmu = Pmu*scale;
PokeIndex<LorentzIndex>(P, Pmu, mu); PokeIndex<LorentzIndex>(P, Pmu, mu);
@ -159,15 +161,15 @@ public:
} }
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::HotConfiguration(pRNG, U); Group::HotConfiguration(pRNG, U);
} }
static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::TepidConfiguration(pRNG, U); Group::TepidConfiguration(pRNG, U);
} }
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::ColdConfiguration(pRNG, U); Group::ColdConfiguration(pRNG, U);
} }
}; };

View File

@ -1,61 +1,63 @@
Using HMC in Grid version 0.5.1 # Using HMC in Grid
These are the instructions to use the Generalised HMC on Grid version 0.5.1. These are the instructions to use the Generalised HMC on Grid as of commit `749b802`.
Disclaimer: GRID is still under active development so any information here can be changed in future releases. Disclaimer: Grid is still under active development so any information here can be changed in future releases.
Command line options ## Command line options
===================
(relevant file GenericHMCrunner.h) (relevant file `GenericHMCrunner.h`)
The initial configuration can be changed at the command line using The initial configuration can be changed at the command line using
--StartType <your choice> `--StartingType STARTING_TYPE`, where `STARTING_TYPE` is one of
valid choices, one among these `HotStart`, `ColdStart`, `TepidStart`, and `CheckpointStart`.
HotStart, ColdStart, TepidStart, CheckpointStart Default: `--StartingType HotStart`
default: HotStart
example Example:
./My_hmc_exec --StartType HotStart ```
./My_hmc_exec --StartingType HotStart
```
The CheckpointStart option uses the prefix for the configurations and rng seed files defined in your executable and the initial configuration is specified by The `CheckpointStart` option uses the prefix for the configurations and rng seed files defined in your executable and the initial configuration is specified by
--StartTrajectory <integer> `--StartingTrajectory STARTING_TRAJECTORY`, where `STARTING_TRAJECTORY` is an integer.
default: 0 Default: `--StartingTrajectory 0`
The number of trajectories for a specific run are specified at command line by The number of trajectories for a specific run are specified at command line by
--Trajectories <integer> `--Trajectories TRAJECTORIES`, where `TRAJECTORIES` is an integer.
default: 1 Default: `--Trajectories 1`
The number of thermalization steps (i.e. steps when the Metropolis acceptance check is turned off) is specified by The number of thermalization steps (i.e. steps when the Metropolis acceptance check is turned off) is specified by
--Thermalizations <integer> `--Thermalizations THERMALIZATIONS`, where `THERMALIZATIONS` is an integer.
default: 10 Default: `--Thermalizations 10`
Any other parameter is defined in the source for the executable. Any other parameter is defined in the source for the executable.
HMC controls ## HMC controls
===========
The lines The lines
```
std::vector<int> SerSeed({1, 2, 3, 4, 5}); std::vector<int> SerSeed({1, 2, 3, 4, 5});
std::vector<int> ParSeed({6, 7, 8, 9, 10}); std::vector<int> ParSeed({6, 7, 8, 9, 10});
```
define the seeds for the serial and the parallel RNG. define the seeds for the serial and the parallel RNG.
The line The line
```
TheHMC.MDparameters.set(20, 1.0);// MDsteps, traj length TheHMC.MDparameters.set(20, 1.0);// MDsteps, traj length
```
declares the number of molecular dynamics steps and the total trajectory length. declares the number of molecular dynamics steps and the total trajectory length.
Actions ## Actions
======
Action names are defined in the file Action names are defined in the directory `Grid/qcd/action`.
lib/qcd/Actions.h
Gauge actions list: Gauge actions list (from `Grid/qcd/action/gauge/Gauge.h`):
```
WilsonGaugeActionR; WilsonGaugeActionR;
WilsonGaugeActionF; WilsonGaugeActionF;
WilsonGaugeActionD; WilsonGaugeActionD;
@ -68,8 +70,9 @@ IwasakiGaugeActionD;
SymanzikGaugeActionR; SymanzikGaugeActionR;
SymanzikGaugeActionF; SymanzikGaugeActionF;
SymanzikGaugeActionD; SymanzikGaugeActionD;
```
```
ConjugateWilsonGaugeActionR; ConjugateWilsonGaugeActionR;
ConjugateWilsonGaugeActionF; ConjugateWilsonGaugeActionF;
ConjugateWilsonGaugeActionD; ConjugateWilsonGaugeActionD;
@ -82,26 +85,23 @@ ConjugateIwasakiGaugeActionD;
ConjugateSymanzikGaugeActionR; ConjugateSymanzikGaugeActionR;
ConjugateSymanzikGaugeActionF; ConjugateSymanzikGaugeActionF;
ConjugateSymanzikGaugeActionD; ConjugateSymanzikGaugeActionD;
```
Each of these action accepts one single parameter at creation time (beta).
Example for creating a Symanzik action with beta=4.0
```
SymanzikGaugeActionR(4.0)
```
Scalar actions list (from `Grid/qcd/action/scalar/Scalar.h`):
```
ScalarActionR; ScalarActionR;
ScalarActionF; ScalarActionF;
ScalarActionD; ScalarActionD;
```
The suffixes `R`, `F`, `D` in the action names refer to the `Real`
each of these action accept one single parameter at creation time (beta). (the precision is defined at compile time by the `--enable-precision` flag in the configure),
Example for creating a Symanzik action with beta=4.0 `Float` and `Double`, that force the precision of the action to be 32, 64 bit respectively.
SymanzikGaugeActionR(4.0)
The suffixes R,F,D in the action names refer to the Real
(the precision is defined at compile time by the --enable-precision flag in the configure),
Float and Double, that force the precision of the action to be 32, 64 bit respectively.

View File

@ -40,7 +40,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
// Dirac algebra adjoint operator (not in to overload other adj) // Dirac algebra adjoint operator (not in to overload other adj)
accelerator_inline Gamma adj(const Gamma &g) inline Gamma adj(const Gamma &g)
{ {
return Gamma (Gamma::adj[g.g]); return Gamma (Gamma::adj[g.g]);
} }
@ -48,7 +48,7 @@ accelerator_inline Gamma adj(const Gamma &g)
// Dirac algebra mutliplication operator // Dirac algebra mutliplication operator
accelerator_inline Gamma operator*(const Gamma &g1, const Gamma &g2) inline Gamma operator*(const Gamma &g1, const Gamma &g2)
{ {
return Gamma (Gamma::mul[g1.g][g2.g]); return Gamma (Gamma::mul[g1.g][g2.g]);
} }

View File

@ -2,14 +2,11 @@
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc Source file: ./lib/serialisation/BaseIO.h
Copyright (C) 2015, 2020 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Michael Marshall <michael.marshall@ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -25,27 +22,14 @@ You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512 #include <Grid/GridCore.h>
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid)
#include "impl.h" std::uint64_t EigenIO::EigenResizeCounter(0);
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid); NAMESPACE_END(Grid)

View File

@ -9,6 +9,7 @@
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -30,6 +31,7 @@ Author: Guido Cossu <guido.cossu@ed.ac.uk>
#ifndef GRID_SERIALISATION_ABSTRACT_READER_H #ifndef GRID_SERIALISATION_ABSTRACT_READER_H
#define GRID_SERIALISATION_ABSTRACT_READER_H #define GRID_SERIALISATION_ABSTRACT_READER_H
#include <atomic>
#include <type_traits> #include <type_traits>
#include <Grid/tensors/Tensors.h> #include <Grid/tensors/Tensors.h>
#include <Grid/serialisation/VectorUtils.h> #include <Grid/serialisation/VectorUtils.h>
@ -110,6 +112,10 @@ namespace Grid {
template <typename ET> template <typename ET>
inline typename std::enable_if<is_tensor_of_container<ET>::value, typename Traits<ET>::scalar_type *>::type inline typename std::enable_if<is_tensor_of_container<ET>::value, typename Traits<ET>::scalar_type *>::type
getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); } getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); }
// Counter for resized EigenTensors (poor man's substitute for allocator)
// Defined in BinaryIO.cc
extern std::uint64_t EigenResizeCounter;
} }
// Abstract writer/reader classes //////////////////////////////////////////// // Abstract writer/reader classes ////////////////////////////////////////////
@ -497,8 +503,14 @@ namespace Grid {
typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims ) Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
{ {
#ifdef GRID_OMP
// The memory counter is the reason this must be done from the primary thread
assert(omp_in_parallel()==0 && "Deserialisation which resizes Eigen tensor must happen from primary thread");
#endif
EigenIO::EigenResizeCounter -= static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar);
//t.reshape( dims ); //t.reshape( dims );
t.resize( dims ); t.resize( dims );
EigenIO::EigenResizeCounter += static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar);
} }
template <typename T> template <typename T>

View File

@ -1,8 +1,39 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./Grid/serialisation/VectorUtils.h
Copyright (C) 2015
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
using namespace Grid; using namespace Grid;
#ifndef H5_NO_NAMESPACE #ifndef H5_NO_NAMESPACE
using namespace H5NS; using namespace H5NS; // Compile error here? Try adding --enable-cxx to hdf5 configure
#endif #endif
// Writer implementation /////////////////////////////////////////////////////// // Writer implementation ///////////////////////////////////////////////////////

Some files were not shown because too many files have changed in this diff Show More