1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Merge branch 'feature/staggering' into develop

This commit is contained in:
paboyle 2017-03-28 15:25:55 +09:00
commit 18bde08d1b
40 changed files with 3938 additions and 90 deletions

View File

@ -0,0 +1,134 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_staggered.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
// pRNG.SeedRandomDevice();
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
typename ImprovedStaggeredFermionR::ImplParams params;
FermionField src (&Grid); random(pRNG,src);
FermionField result(&Grid); result=zero;
FermionField ref(&Grid); ref=zero;
FermionField tmp(&Grid); tmp=zero;
FermionField err(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
// Only one non-zero (y)
#if 0
Umu=zero;
Complex cone(1.0,0.0);
for(int nn=0;nn<Nd;nn++){
random(pRNG,U[nn]);
if(1) {
if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
// else { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
else { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
}
PokeIndex<LorentzIndex>(Umu,U[nn],nn);
}
#endif
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
}
ref = zero;
/*
{ // Naive wilson implementation
ref = zero;
for(int mu=0;mu<Nd;mu++){
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
tmp = U[mu]*Cshift(src,mu,1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
}
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu,-1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
}
}
}
ref = -0.5*ref;
*/
RealD mass=0.1;
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
int ncall=1000;
double t0=usecond();
for(int i=0;i<ncall;i++){
Ds.Dhop(src,result,0);
}
double t1=usecond();
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
Grid_finalize();
}

View File

@ -324,12 +324,15 @@ void Grid_init(int *argc,char ***argv)
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptHandUnroll;
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptHandUnroll;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptInlineAsm;
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptGeneric;
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptGeneric;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
LebesgueOrder::UseLebesgueOrder=1;
@ -413,7 +416,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
#endif
#endif
BACKTRACE();
exit(0);
if ( si->si_signo != SIGTRAP ) exit(0);
return;
};

View File

@ -87,6 +87,7 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
assert(0);
@ -97,7 +98,7 @@ void CartesianCommunicator::Barrier(void){}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor ;}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor; }
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
source =0;

View File

@ -45,6 +45,10 @@ namespace QCD {
WilsonImplParams() : overlapCommsCompute(false) {};
};
struct StaggeredImplParams {
StaggeredImplParams() {};
};
struct OneFlavourRationalParams {
RealD lo;
RealD hi;

View File

@ -53,6 +53,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/qcd/action/fermion/FermionOperatorImpl.h>
#include <Grid/qcd/action/fermion/FermionOperator.h>
#include <Grid/qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
#include <Grid/qcd/action/fermion/StaggeredKernels.h> //used by all wilson type fermions
////////////////////////////////////////////
// Gauge Actions
@ -108,6 +109,14 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
////////////////////////////////////////////////////////////////////////////////////////////////////
#define FermOpStaggeredTemplateInstantiate(A) \
template class A<StaggeredImplF>; \
template class A<StaggeredImplD>;
#define FermOpStaggeredVec5dTemplateInstantiate(A) \
template class A<StaggeredVec5dImplF>; \
template class A<StaggeredVec5dImplD>;
#define FermOp4dVecTemplateInstantiate(A) \
template class A<WilsonImplF>; \
template class A<WilsonImplD>; \
@ -147,6 +156,9 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
//#include <Grid/qcd/action/fermion/CloverFermion.h>
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion.h>
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> // Cayley types
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
@ -268,6 +280,17 @@ typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR;
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplR> ImprovedStaggeredFermionVec5dR;
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplF> ImprovedStaggeredFermionVec5dF;
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermionVec5dD;
}}

View File

@ -235,11 +235,13 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
/////////////////////////////////////////////////
// Make the doubled gauge field a *scalar*
/////////////////////////////////////////////////
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
@ -271,11 +273,11 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu)
{
SiteScalarGaugeField ScalarUmu;
SiteScalarGaugeField ScalarUmu;
SiteDoubledGaugeField ScalarUds;
GaugeLinkField U(Umu._grid);
GaugeField Uadj(Umu._grid);
GaugeField Uadj(Umu._grid);
for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
U = adj(Cshift(U, mu, -1));
@ -356,7 +358,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
StencilImpl &St) {
typedef SiteHalfSpinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj;
typedef typename SiteHalfSpinor::scalar_object sobj;
vobj vtmp;
sobj stmp;
@ -512,6 +514,316 @@ PARALLEL_FOR_LOOP
};
/////////////////////////////////////////////////////////////////////////////
// Single flavour one component spinors with colour index
/////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation >
class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
typedef RealD _Coeff_t ;
static const int Dimension = Representation::Dimension;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplHalfSpinor = iVector<iScalar<iVector<vtype, Dimension> >, Ngp>;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
typedef iImplScalar<Simd> SiteComplex;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteComplex> ComplexField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef SimpleCompressor<SiteSpinor> Compressor;
typedef StaggeredImplParams ImplParams;
typedef CartesianStencil<SiteSpinor, SiteSpinor> StencilImpl;
ImplParams Params;
StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
inline void multLink(SiteSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteSpinor &chi,
int mu){
mult(&phi(), &U(mu), &chi());
}
inline void multLinkAdd(SiteSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteSpinor &chi,
int mu){
mac(&phi(), &U(mu), &chi());
}
template <class ref>
inline void loadLinkElement(Simd &reg, ref &memory) {
reg = memory;
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &UUUds, // for Naik term
DoubledGaugeField &Uds,
const GaugeField &Uthin,
const GaugeField &Ufat) {
conformable(Uds._grid, GaugeGrid);
conformable(Uthin._grid, GaugeGrid);
conformable(Ufat._grid, GaugeGrid);
GaugeLinkField U(GaugeGrid);
GaugeLinkField UU(GaugeGrid);
GaugeLinkField UUU(GaugeGrid);
GaugeLinkField Udag(GaugeGrid);
GaugeLinkField UUUdag(GaugeGrid);
for (int mu = 0; mu < Nd; mu++) {
// Staggered Phase.
Lattice<iScalar<vInteger> > coor(GaugeGrid);
Lattice<iScalar<vInteger> > x(GaugeGrid); LatticeCoordinate(x,0);
Lattice<iScalar<vInteger> > y(GaugeGrid); LatticeCoordinate(y,1);
Lattice<iScalar<vInteger> > z(GaugeGrid); LatticeCoordinate(z,2);
Lattice<iScalar<vInteger> > t(GaugeGrid); LatticeCoordinate(t,3);
Lattice<iScalar<vInteger> > lin_z(GaugeGrid); lin_z=x+y;
Lattice<iScalar<vInteger> > lin_t(GaugeGrid); lin_t=x+y+z;
ComplexField phases(GaugeGrid); phases=1.0;
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
// 1 hop based on fat links
U = PeekIndex<LorentzIndex>(Ufat, mu);
Udag = adj( Cshift(U, mu, -1));
U = U *phases;
Udag = Udag *phases;
PokeIndex<LorentzIndex>(Uds, U, mu);
PokeIndex<LorentzIndex>(Uds, Udag, mu + 4);
// 3 hop based on thin links. Crazy huh ?
U = PeekIndex<LorentzIndex>(Uthin, mu);
UU = Gimpl::CovShiftForward(U,mu,U);
UUU= Gimpl::CovShiftForward(U,mu,UU);
UUUdag = adj( Cshift(UUU, mu, -3));
UUU = UUU *phases;
UUUdag = UUUdag *phases;
PokeIndex<LorentzIndex>(UUUds, UUU, mu);
PokeIndex<LorentzIndex>(UUUds, UUUdag, mu+4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat._grid);
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
assert (0);
// Must never hit
}
};
/////////////////////////////////////////////////////////////////////////////
// Single flavour one component spinors with colour index. 5d vec
/////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation >
class StaggeredVec5dImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
typedef RealD _Coeff_t ;
static const int Dimension = Representation::Dimension;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nd>;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
// Make the doubled gauge field a *scalar*
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef iImplScalar<Simd> SiteComplex;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef Lattice<SiteComplex> ComplexField;
typedef Lattice<SiteSpinor> FermionField;
typedef SimpleCompressor<SiteSpinor> Compressor;
typedef StaggeredImplParams ImplParams;
typedef CartesianStencil<SiteSpinor, SiteSpinor> StencilImpl;
ImplParams Params;
StaggeredVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
template <class ref>
inline void loadLinkElement(Simd &reg, ref &memory) {
vsplat(reg, memory);
}
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu) {
SiteGaugeLink UU;
for (int i = 0; i < Dimension; i++) {
for (int j = 0; j < Dimension; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mult(&phi(), &UU(), &chi());
}
inline void multLinkAdd(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu) {
SiteGaugeLink UU;
for (int i = 0; i < Dimension; i++) {
for (int j = 0; j < Dimension; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mac(&phi(), &UU(), &chi());
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &UUUds, // for Naik term
DoubledGaugeField &Uds,
const GaugeField &Uthin,
const GaugeField &Ufat)
{
GridBase * InputGrid = Uthin._grid;
conformable(InputGrid,Ufat._grid);
GaugeLinkField U(InputGrid);
GaugeLinkField UU(InputGrid);
GaugeLinkField UUU(InputGrid);
GaugeLinkField Udag(InputGrid);
GaugeLinkField UUUdag(InputGrid);
for (int mu = 0; mu < Nd; mu++) {
// Staggered Phase.
Lattice<iScalar<vInteger> > coor(InputGrid);
Lattice<iScalar<vInteger> > x(InputGrid); LatticeCoordinate(x,0);
Lattice<iScalar<vInteger> > y(InputGrid); LatticeCoordinate(y,1);
Lattice<iScalar<vInteger> > z(InputGrid); LatticeCoordinate(z,2);
Lattice<iScalar<vInteger> > t(InputGrid); LatticeCoordinate(t,3);
Lattice<iScalar<vInteger> > lin_z(InputGrid); lin_z=x+y;
Lattice<iScalar<vInteger> > lin_t(InputGrid); lin_t=x+y+z;
ComplexField phases(InputGrid); phases=1.0;
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
// 1 hop based on fat links
U = PeekIndex<LorentzIndex>(Ufat, mu);
Udag = adj( Cshift(U, mu, -1));
U = U *phases;
Udag = Udag *phases;
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
SiteScalarGaugeLink ScalarU;
SiteDoubledGaugeField ScalarUds;
std::vector<int> lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUds, Uds, lcoor);
peekLocalSite(ScalarU, U, lcoor);
ScalarUds(mu) = ScalarU();
peekLocalSite(ScalarU, Udag, lcoor);
ScalarUds(mu + 4) = ScalarU();
pokeLocalSite(ScalarUds, Uds, lcoor);
}
// 3 hop based on thin links. Crazy huh ?
U = PeekIndex<LorentzIndex>(Uthin, mu);
UU = Gimpl::CovShiftForward(U,mu,U);
UUU= Gimpl::CovShiftForward(U,mu,UU);
UUUdag = adj( Cshift(UUU, mu, -3));
UUU = UUU *phases;
UUUdag = UUUdag *phases;
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
SiteScalarGaugeLink ScalarU;
SiteDoubledGaugeField ScalarUds;
std::vector<int> lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUds, UUUds, lcoor);
peekLocalSite(ScalarU, UUU, lcoor);
ScalarUds(mu) = ScalarU();
peekLocalSite(ScalarU, UUUdag, lcoor);
ScalarUds(mu + 4) = ScalarU();
pokeLocalSite(ScalarUds, UUUds, lcoor);
}
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
assert (0);
}
};
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
@ -540,6 +852,14 @@ PARALLEL_FOR_LOOP
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
typedef StaggeredImpl<vComplexD, FundamentalRepresentation > StaggeredImplD; // Double
typedef StaggeredVec5dImpl<vComplex, FundamentalRepresentation > StaggeredVec5dImplR; // Real.. whichever prec
typedef StaggeredVec5dImpl<vComplexF, FundamentalRepresentation > StaggeredVec5dImplF; // Float
typedef StaggeredVec5dImpl<vComplexD, FundamentalRepresentation > StaggeredVec5dImplD; // Double
}}
#endif

View File

@ -0,0 +1,356 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
Copyright (C) 2015
Author: Azusa Yamaguchi, Peter Boyle
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
namespace Grid {
namespace QCD {
const std::vector<int>
ImprovedStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
const std::vector<int>
ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
/////////////////////////////////
// Constructor and gauge import
/////////////////////////////////
template <class Impl>
ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Ufat, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p)
: Kernels(p),
_grid(&Fgrid),
_cbgrid(&Hgrid),
Stencil(&Fgrid, npoint, Even, directions, displacements),
StencilEven(&Hgrid, npoint, Even, directions, displacements), // source is Even
StencilOdd(&Hgrid, npoint, Odd, directions, displacements), // source is Odd
mass(_mass),
c1(_c1),
c2(_c2),
u0(_u0),
Lebesgue(_grid),
LebesgueEvenOdd(_cbgrid),
Umu(&Fgrid),
UmuEven(&Hgrid),
UmuOdd(&Hgrid),
UUUmu(&Fgrid),
UUUmuEven(&Hgrid),
UUUmuOdd(&Hgrid)
{
// Allocate the required comms buffer
ImportGauge(_Uthin,_Ufat);
}
////////////////////////////////////////////////////////////
// Momentum space propagator should be
// https://arxiv.org/pdf/hep-lat/9712010.pdf
//
// mom space action.
// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m
//
// must track through staggered flavour/spin reduction in literature to
// turn to free propagator for the one component chi field, a la page 4/5
// of above link to implmement fourier based solver.
////////////////////////////////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin)
{
ImportGauge(_Uthin,_Uthin);
};
template <class Impl>
void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat)
{
GaugeLinkField U(GaugeGrid());
////////////////////////////////////////////////////////
// Double Store should take two fields for Naik and one hop separately.
////////////////////////////////////////////////////////
Impl::DoubleStore(GaugeGrid(), UUUmu, Umu, _Uthin, _Ufat );
////////////////////////////////////////////////////////
// Apply scale factors to get the right fermion Kinetic term
// Could pass coeffs into the double store to save work.
// 0.5 ( U p(x+mu) - Udag(x-mu) p(x-mu) )
////////////////////////////////////////////////////////
for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
PokeIndex<LorentzIndex>(Umu, U*( 0.5*c1/u0), mu );
U = PeekIndex<LorentzIndex>(Umu, mu+4);
PokeIndex<LorentzIndex>(Umu, U*(-0.5*c1/u0), mu+4);
U = PeekIndex<LorentzIndex>(UUUmu, mu);
PokeIndex<LorentzIndex>(UUUmu, U*( 0.5*c2/u0/u0/u0), mu );
U = PeekIndex<LorentzIndex>(UUUmu, mu+4);
PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4);
}
pickCheckerboard(Even, UmuEven, Umu);
pickCheckerboard(Odd, UmuOdd , Umu);
pickCheckerboard(Even, UUUmuEven, UUUmu);
pickCheckerboard(Odd, UUUmuOdd, UUUmu);
}
/////////////////////////////
// Implement the interface
/////////////////////////////
template <class Impl>
RealD ImprovedStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Dhop(in, out, DaggerNo);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
RealD ImprovedStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Dhop(in, out, DaggerYes);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
}
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
}
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
typename FermionField::scalar_type scal(mass);
out = scal * in;
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Mooee(in, out);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out = (1.0 / (mass)) * in;
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,
FermionField &out) {
out.checkerboard = in.checkerboard;
MooeeInv(in, out);
}
///////////////////////////////////
// Internal
///////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU,
GaugeField & mat,
const FermionField &A, const FermionField &B, int dag) {
assert((dag == DaggerNo) || (dag == DaggerYes));
Compressor compressor;
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
Atilde = A;
st.HaloExchange(B, compressor);
for (int mu = 0; mu < Nd; mu++) {
////////////////////////
// Call the single hop
////////////////////////
PARALLEL_FOR_LOOP
for (int sss = 0; sss < B._grid->oSites(); sss++) {
Kernels::DhopDir(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1);
}
// Force in three link terms
//
// Impl::InsertForce4D(mat, Btilde, Atilde, mu);
//
// dU_ac(x)/dt = i p_ab U_bc(x)
//
// => dS_f/dt = dS_f/dU_ac(x) . dU_ac(x)/dt = i p_ab U_bc(x) dS_f/dU_ac(x)
//
// One link: form fragments S_f = A U B
//
// write Btilde = U(x) B(x+mu)
//
// mat+= TraceIndex<SpinIndex>(outerProduct(Btilde,A));
//
// Three link: form fragments S_f = A UUU B
//
// mat+= outer ( A, UUUB) <-- Best take DhopDeriv with one linke or identity matrix
// mat+= outer ( AU, UUB) <-- and then use covariant cshift?
// mat+= outer ( AUU, UB) <-- Returned from call to DhopDir
assert(0);// need to figure out the force interface with a blasted three link term.
}
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _grid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
mat.checkerboard = U.checkerboard;
DerivInternal(Stencil, Umu, UUUmu, mat, U, V, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
assert(V.checkerboard == Even);
assert(U.checkerboard == Odd);
mat.checkerboard = Odd;
DerivInternal(StencilEven, UmuOdd, UUUmuOdd, mat, U, V, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
assert(V.checkerboard == Odd);
assert(U.checkerboard == Even);
mat.checkerboard = Even;
DerivInternal(StencilOdd, UmuEven, UUUmuEven, mat, U, V, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
conformable(in._grid, _grid); // verifies full grid
conformable(in._grid, out._grid);
out.checkerboard = in.checkerboard;
DhopInternal(Stencil, Lebesgue, Umu, UUUmu, in, out, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
assert(in.checkerboard == Even);
out.checkerboard = Odd;
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, UUUmuOdd, in, out, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag) {
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
assert(in.checkerboard == Odd);
out.checkerboard = Even;
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, UUUmuEven, in, out, dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
DhopDir(in, out, dir, disp);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) {
Compressor compressor;
Stencil.HaloExchange(in, compressor);
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopDir(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp);
}
};
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
const FermionField &in,
FermionField &out, int dag) {
assert((dag == DaggerNo) || (dag == DaggerYes));
Compressor compressor;
st.HaloExchange(in, compressor);
if (dag == DaggerYes) {
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
}
} else {
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
}
}
};
FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion);
//AdjointFermOpTemplateInstantiate(ImprovedStaggeredFermion);
//TwoIndexFermOpTemplateInstantiate(ImprovedStaggeredFermion);
}}

View File

@ -0,0 +1,155 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ImprovedStaggered.h
Copyright (C) 2015
Author: Azusa Yamaguchi, Peter Boyle
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_IMPR_STAG_FERMION_H
#define GRID_QCD_IMPR_STAG_FERMION_H
namespace Grid {
namespace QCD {
class ImprovedStaggeredFermionStatic {
public:
static const std::vector<int> directions;
static const std::vector<int> displacements;
static const int npoint = 16;
};
template <class Impl>
class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedStaggeredFermionStatic {
public:
INHERIT_IMPL_TYPES(Impl);
typedef StaggeredKernels<Impl> Kernels;
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _grid; }
GridBase *GaugeRedBlackGrid(void) { return _cbgrid; }
GridBase *FermionGrid(void) { return _grid; }
GridBase *FermionRedBlackGrid(void) { return _cbgrid; }
//////////////////////////////////////////////////////////////////
// override multiply; cut number routines if pass dagger argument
// and also make interface more uniformly consistent
//////////////////////////////////////////////////////////////////
RealD M(const FermionField &in, FermionField &out);
RealD Mdag(const FermionField &in, FermionField &out);
/////////////////////////////////////////////////////////
// half checkerboard operations
/////////////////////////////////////////////////////////
void Meooe(const FermionField &in, FermionField &out);
void MeooeDag(const FermionField &in, FermionField &out);
void Mooee(const FermionField &in, FermionField &out);
void MooeeDag(const FermionField &in, FermionField &out);
void MooeeInv(const FermionField &in, FermionField &out);
void MooeeInvDag(const FermionField &in, FermionField &out);
////////////////////////
// Derivative interface
////////////////////////
// Interface calls an internal routine
void DhopDeriv (GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
void DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
void DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
///////////////////////////////////////////////////////////////
// non-hermitian hopping term; half cb or both
///////////////////////////////////////////////////////////////
void Dhop (const FermionField &in, FermionField &out, int dag);
void DhopOE(const FermionField &in, FermionField &out, int dag);
void DhopEO(const FermionField &in, FermionField &out, int dag);
///////////////////////////////////////////////////////////////
// Multigrid assistance; force term uses too
///////////////////////////////////////////////////////////////
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
///////////////////////////////////////////////////////////////
// Extra methods added by derived
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl &st,
DoubledGaugeField &U,DoubledGaugeField &UUU,
GaugeField &mat,
const FermionField &A, const FermionField &B, int dag);
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
const FermionField &in, FermionField &out, int dag);
// Constructor
ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Ufat, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0,
const ImplParams &p = ImplParams());
// DoubleStore impl dependent
void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat);
void ImportGauge(const GaugeField &_Uthin);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
// protected:
public:
// any other parameters of action ???
RealD mass;
RealD u0;
RealD c1;
RealD c2;
GridBase *_grid;
GridBase *_cbgrid;
// Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
DoubledGaugeField UUUmu;
DoubledGaugeField UUUmuEven;
DoubledGaugeField UUUmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
};
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
}
}
#endif

View File

@ -0,0 +1,356 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <PerfCount.h>
namespace Grid {
namespace QCD {
// S-direction is INNERMOST and takes no part in the parity.
const std::vector<int>
ImprovedStaggeredFermion5DStatic::directions({1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4});
const std::vector<int>
ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
// 5d lattice for DWF.
template<class Impl>
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GaugeField &_Uthin,GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,
RealD _c1,RealD _c2, RealD _u0,
const ImplParams &p) :
Kernels(p),
_FiveDimGrid (&FiveDimGrid),
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
_FourDimGrid (&FourDimGrid),
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
mass(_mass),
c1(_c1),
c2(_c2),
u0(_u0),
Umu(_FourDimGrid),
UmuEven(_FourDimRedBlackGrid),
UmuOdd (_FourDimRedBlackGrid),
UUUmu(_FourDimGrid),
UUUmuEven(_FourDimRedBlackGrid),
UUUmuOdd(_FourDimRedBlackGrid),
Lebesgue(_FourDimGrid),
LebesgueEvenOdd(_FourDimRedBlackGrid)
{
// some assertions
assert(FiveDimGrid._ndimension==5);
assert(FourDimGrid._ndimension==4);
assert(FourDimRedBlackGrid._ndimension==4);
assert(FiveDimRedBlackGrid._ndimension==5);
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
// extent of fifth dim and not spread out
Ls=FiveDimGrid._fdimensions[0];
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
assert(FiveDimGrid._processors[0] ==1);
assert(FiveDimRedBlackGrid._processors[0] ==1);
// Other dimensions must match the decomposition of the four-D fields
for(int d=0;d<4;d++){
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
}
if (Impl::LsVectorised) {
int nsimd = Simd::Nsimd();
// Dimension zero of the five-d is the Ls direction
assert(FiveDimGrid._simd_layout[0] ==nsimd);
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
for(int d=0;d<4;d++){
assert(FourDimGrid._simd_layout[d]=1);
assert(FourDimRedBlackGrid._simd_layout[d]=1);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
}
} else {
// Dimension zero of the five-d is the Ls direction
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
assert(FiveDimGrid._simd_layout[0] ==1);
}
// Allocate the required comms buffer
ImportGauge(_Uthin,_Ufat);
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::ImportGauge(const GaugeField &_Uthin)
{
ImportGauge(_Uthin,_Uthin);
};
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat)
{
////////////////////////////////////////////////////////
// Double Store should take two fields for Naik and one hop separately.
////////////////////////////////////////////////////////
Impl::DoubleStore(GaugeGrid(), UUUmu, Umu, _Uthin, _Ufat );
////////////////////////////////////////////////////////
// Apply scale factors to get the right fermion Kinetic term
// Could pass coeffs into the double store to save work.
// 0.5 ( U p(x+mu) - Udag(x-mu) p(x-mu) )
////////////////////////////////////////////////////////
for (int mu = 0; mu < Nd; mu++) {
auto U = PeekIndex<LorentzIndex>(Umu, mu);
PokeIndex<LorentzIndex>(Umu, U*( 0.5*c1/u0), mu );
U = PeekIndex<LorentzIndex>(Umu, mu+4);
PokeIndex<LorentzIndex>(Umu, U*(-0.5*c1/u0), mu+4);
U = PeekIndex<LorentzIndex>(UUUmu, mu);
PokeIndex<LorentzIndex>(UUUmu, U*( 0.5*c2/u0/u0/u0), mu );
U = PeekIndex<LorentzIndex>(UUUmu, mu+4);
PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4);
}
pickCheckerboard(Even, UmuEven, Umu);
pickCheckerboard(Odd, UmuOdd , Umu);
pickCheckerboard(Even, UUUmuEven, UUUmu);
pickCheckerboard(Odd, UUUmuOdd, UUUmu);
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp)
{
int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
// we drop off the innermost fifth dimension
Compressor compressor;
Stencil.HaloExchange(in,compressor);
PARALLEL_FOR_LOOP
for(int ss=0;ss<Umu._grid->oSites();ss++){
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
Kernels::DhopDir(Stencil, Umu, UUUmu, Stencil.CommBuf(), sF, sU, in, out, dir, disp);
}
}
};
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
DoubledGaugeField & UUU,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
// No force terms in multi-rhs solver staggered
assert(0);
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopDeriv(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
assert(0);
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
assert(0);
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
assert(0);
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,DoubledGaugeField & UUU,
const FermionField &in, FermionField &out,int dag)
{
Compressor compressor;
int LLs = in._grid->_rdimensions[0];
st.HaloExchange(in,compressor);
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
if (dag == DaggerYes) {
PARALLEL_FOR_LOOP
for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU=ss;
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
}
} else {
PARALLEL_FOR_LOOP
for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU=ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
}
}
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
{
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
assert(in.checkerboard==Even);
out.checkerboard = Odd;
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,UUUmuOdd,in,out,dag);
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
{
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
assert(in.checkerboard==Odd);
out.checkerboard = Even;
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,UUUmuEven,in,out,dag);
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
{
conformable(in._grid,FermionGrid()); // verifies full grid
conformable(in._grid,out._grid);
out.checkerboard = in.checkerboard;
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
}
/////////////////////////////////////////////////////////////////////////
// Implement the general interface. Here we use SAME mass on all slices
/////////////////////////////////////////////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
DhopDir(in, out, dir, disp);
}
template <class Impl>
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Dhop(in, out, DaggerNo);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
RealD ImprovedStaggeredFermion5D<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Dhop(in, out, DaggerYes);
return axpy_norm(out, mass, in, out);
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
}
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
}
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
typename FermionField::scalar_type scal(mass);
out = scal * in;
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Mooee(in, out);
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out = (1.0 / (mass)) * in;
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,
FermionField &out) {
out.checkerboard = in.checkerboard;
MooeeInv(in, out);
}
FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion5D);
FermOpStaggeredVec5dTemplateInstantiate(ImprovedStaggeredFermion5D);
}}

View File

@ -0,0 +1,164 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: AzusaYamaguchi <ayamaguc@staffmail.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H
#define GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H
namespace Grid {
namespace QCD {
////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
////////////////////////////////////////////////////////////////////////////////
class ImprovedStaggeredFermion5DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
const int npoint = 16;
};
template<class Impl>
class ImprovedStaggeredFermion5D : public StaggeredKernels<Impl>, public ImprovedStaggeredFermion5DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef StaggeredKernels<Impl> Kernels;
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FiveDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
// full checkerboard operations; leave unimplemented as abstract for now
RealD M (const FermionField &in, FermionField &out);
RealD Mdag (const FermionField &in, FermionField &out);
// half checkerboard operations
void Meooe (const FermionField &in, FermionField &out);
void Mooee (const FermionField &in, FermionField &out);
void MooeeInv (const FermionField &in, FermionField &out);
void MeooeDag (const FermionField &in, FermionField &out);
void MooeeDag (const FermionField &in, FermionField &out);
void MooeeInvDag (const FermionField &in, FermionField &out);
void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
// These can be overridden by fancy 5d chiral action
void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// Implement hopping term non-hermitian hopping term; half cb or both
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
DoubledGaugeField & UUU,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
void DhopInternal(StencilImpl & st,
LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
const FermionField &in,
FermionField &out,
int dag);
// Constructors
ImprovedStaggeredFermion5D(GaugeField &_Uthin,
GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _mass,
RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0,
const ImplParams &p= ImplParams());
// DoubleStore
void ImportGauge(const GaugeField &_U);
void ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
RealD mass;
RealD c1;
RealD c2;
RealD u0;
int Ls;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
DoubledGaugeField UUUmu;
DoubledGaugeField UUUmuEven;
DoubledGaugeField UUUmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
};
}}
#endif

View File

@ -0,0 +1,273 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Azusa Yamaguchi, Peter Boyle
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
namespace Grid {
namespace QCD {
int StaggeredKernelsStatic::Opt;
template <class Impl>
StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteSpinor *buf, int sF,
int sU, const FermionField &in, SiteSpinor &out,int threeLink) {
const SiteSpinor *chi_p;
SiteSpinor chi;
SiteSpinor Uchi;
StencilEntry *SE;
int ptype;
int skew = 0;
if (threeLink) skew=8;
///////////////////////////
// Xp
///////////////////////////
SE = st.GetEntry(ptype, Xp+skew, sF);
if (SE->_is_local) {
if (SE->_permute) {
chi_p = &chi;
permute(chi, in._odata[SE->_offset], ptype);
} else {
chi_p = &in._odata[SE->_offset];
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp);
///////////////////////////
// Yp
///////////////////////////
SE = st.GetEntry(ptype, Yp+skew, sF);
if (SE->_is_local) {
if (SE->_permute) {
chi_p = &chi;
permute(chi, in._odata[SE->_offset], ptype);
} else {
chi_p = &in._odata[SE->_offset];
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Yp);
///////////////////////////
// Zp
///////////////////////////
SE = st.GetEntry(ptype, Zp+skew, sF);
if (SE->_is_local) {
if (SE->_permute) {
chi_p = &chi;
permute(chi, in._odata[SE->_offset], ptype);
} else {
chi_p = &in._odata[SE->_offset];
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Zp);
///////////////////////////
// Tp
///////////////////////////
SE = st.GetEntry(ptype, Tp+skew, sF);
if (SE->_is_local) {
if (SE->_permute) {
chi_p = &chi;
permute(chi, in._odata[SE->_offset], ptype);
} else {
chi_p = &in._odata[SE->_offset];
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Tp);
///////////////////////////
// Xm
///////////////////////////
SE = st.GetEntry(ptype, Xm+skew, sF);
if (SE->_is_local) {
if (SE->_permute) {
chi_p = &chi;
permute(chi, in._odata[SE->_offset], ptype);
} else {
chi_p = &in._odata[SE->_offset];
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Xm);
///////////////////////////
// Ym
///////////////////////////
SE = st.GetEntry(ptype, Ym+skew, sF);
if (SE->_is_local) {
if (SE->_permute) {
chi_p = &chi;
permute(chi, in._odata[SE->_offset], ptype);
} else {
chi_p = &in._odata[SE->_offset];
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Ym);
///////////////////////////
// Zm
///////////////////////////
SE = st.GetEntry(ptype, Zm+skew, sF);
if (SE->_is_local) {
if (SE->_permute) {
chi_p = &chi;
permute(chi, in._odata[SE->_offset], ptype);
} else {
chi_p = &in._odata[SE->_offset];
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Zm);
///////////////////////////
// Tm
///////////////////////////
SE = st.GetEntry(ptype, Tm+skew, sF);
if (SE->_is_local) {
if (SE->_permute) {
chi_p = &chi;
permute(chi, in._odata[SE->_offset], ptype);
} else {
chi_p = &in._odata[SE->_offset];
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Tm);
vstream(out, Uchi);
};
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out) {
SiteSpinor naik;
SiteSpinor naive;
int oneLink =0;
int threeLink=1;
int dag=1;
switch(Opt) {
#ifdef AVX512
//FIXME; move the sign into the Asm routine
case OptInlineAsm:
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out);
for(int s=0;s<LLs;s++) {
int sF=s+LLs*sU;
out._odata[sF]=-out._odata[sF];
}
break;
#endif
case OptHandUnroll:
DhopSiteHand(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
break;
case OptGeneric:
for(int s=0;s<LLs;s++){
int sF=s+LLs*sU;
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
out._odata[sF] =-naive-naik;
}
break;
default:
assert(0);
break;
}
};
template <class Impl>
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
{
int oneLink =0;
int threeLink=1;
SiteSpinor naik;
SiteSpinor naive;
int dag=0;
switch(Opt) {
#ifdef AVX512
case OptInlineAsm:
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out);
break;
#endif
case OptHandUnroll:
DhopSiteHand(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
break;
case OptGeneric:
for(int s=0;s<LLs;s++){
int sF=LLs*sU+s;
// assert(sF<in._odata.size());
// assert(sU< U._odata.size());
// assert(sF>=0); assert(sU>=0);
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
out._odata[sF] =naive+naik;
}
break;
default:
assert(0);
break;
}
};
template <class Impl>
void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int disp)
{
// Disp should be either +1,-1,+3,-3
// What about "dag" ?
// Because we work out pU . dS/dU
// U
assert(0);
}
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
}}

View File

@ -0,0 +1,83 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/StaggeredKernels.h
Copyright (C) 2015
Author: Azusa Yamaguchi, Peter Boyle
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_STAGGERED_KERNELS_H
#define GRID_QCD_STAGGERED_KERNELS_H
namespace Grid {
namespace QCD {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Staggered stencil for a single site.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class StaggeredKernelsStatic {
public:
enum { OptGeneric, OptHandUnroll, OptInlineAsm };
// S-direction is INNERMOST and takes no part in the parity.
static int Opt; // these are a temporary hack
};
template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , public StaggeredKernelsStatic {
public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
public:
void DhopDir(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink);
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink);
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf,
int LLs, int sU, const FermionField &in, FermionField &out, int dag);
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf,
int LLs, int sU, const FermionField &in, FermionField &out);
void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf,
int LLs, int sU, const FermionField &in, FermionField &out);
public:
StaggeredKernels(const ImplParams &p = ImplParams());
};
}}
#endif

View File

@ -0,0 +1,910 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <simd/Intel512common.h>
#include <simd/Intel512avx.h>
// Interleave operations from two directions
// This looks just like a 2 spin multiply and reuse same sequence from the Wilson
// Kernel. But the spin index becomes a mu index instead.
#define Chi_00 %zmm0
#define Chi_01 %zmm1
#define Chi_02 %zmm2
#define Chi_10 %zmm3
#define Chi_11 %zmm4
#define Chi_12 %zmm5
#define Chi_20 %zmm6
#define Chi_21 %zmm7
#define Chi_22 %zmm8
#define Chi_30 %zmm9
#define Chi_31 %zmm10
#define Chi_32 %zmm11
#define UChi_00 %zmm12
#define UChi_01 %zmm13
#define UChi_02 %zmm14
#define UChi_10 %zmm15
#define UChi_11 %zmm16
#define UChi_12 %zmm17
#define UChi_20 %zmm18
#define UChi_21 %zmm19
#define UChi_22 %zmm20
#define UChi_30 %zmm21
#define UChi_31 %zmm22
#define UChi_32 %zmm23
#define pChi_00 %%zmm0
#define pChi_01 %%zmm1
#define pChi_02 %%zmm2
#define pChi_10 %%zmm3
#define pChi_11 %%zmm4
#define pChi_12 %%zmm5
#define pChi_20 %%zmm6
#define pChi_21 %%zmm7
#define pChi_22 %%zmm8
#define pChi_30 %%zmm9
#define pChi_31 %%zmm10
#define pChi_32 %%zmm11
#define pUChi_00 %%zmm12
#define pUChi_01 %%zmm13
#define pUChi_02 %%zmm14
#define pUChi_10 %%zmm15
#define pUChi_11 %%zmm16
#define pUChi_12 %%zmm17
#define pUChi_20 %%zmm18
#define pUChi_21 %%zmm19
#define pUChi_22 %%zmm20
#define pUChi_30 %%zmm21
#define pUChi_31 %%zmm22
#define pUChi_32 %%zmm23
#define T0 %zmm24
#define T1 %zmm25
#define T2 %zmm26
#define T3 %zmm27
#define Z00 %zmm26
#define Z10 %zmm27
#define Z0 Z00
#define Z1 %zmm28
#define Z2 %zmm29
#define Z3 %zmm30
#define Z4 %zmm31
#define Z5 Chi_31
#define Z6 Chi_32
#define MULT_ADD_LS(g0,g1,g2,g3) \
asm ( "movq %0, %%r8 \n\t" \
"movq %1, %%r9 \n\t" \
"movq %2, %%r10 \n\t" \
"movq %3, %%r11 \n\t" : : "r"(g0), "r"(g1), "r"(g2), "r"(g3) : "%r8","%r9","%r10","%r11" );\
asm ( \
VSHUF(Chi_00,T0) VSHUF(Chi_10,T1) \
VSHUF(Chi_20,T2) VSHUF(Chi_30,T3) \
VMADDSUBIDUP(0,%r8,T0,UChi_00) VMADDSUBIDUP(0,%r9,T1,UChi_10) \
VMADDSUBIDUP(3,%r8,T0,UChi_01) VMADDSUBIDUP(3,%r9,T1,UChi_11) \
VMADDSUBIDUP(6,%r8,T0,UChi_02) VMADDSUBIDUP(6,%r9,T1,UChi_12) \
VMADDSUBIDUP(0,%r10,T2,UChi_20) VMADDSUBIDUP(0,%r11,T3,UChi_30) \
VMADDSUBIDUP(3,%r10,T2,UChi_21) VMADDSUBIDUP(3,%r11,T3,UChi_31) \
VMADDSUBIDUP(6,%r10,T2,UChi_22) VMADDSUBIDUP(6,%r11,T3,UChi_32) \
VMADDSUBRDUP(0,%r8,Chi_00,UChi_00) VMADDSUBRDUP(0,%r9,Chi_10,UChi_10) \
VMADDSUBRDUP(3,%r8,Chi_00,UChi_01) VMADDSUBRDUP(3,%r9,Chi_10,UChi_11) \
VMADDSUBRDUP(6,%r8,Chi_00,UChi_02) VMADDSUBRDUP(6,%r9,Chi_10,UChi_12) \
VMADDSUBRDUP(0,%r10,Chi_20,UChi_20) VMADDSUBRDUP(0,%r11,Chi_30,UChi_30) \
VMADDSUBRDUP(3,%r10,Chi_20,UChi_21) VMADDSUBRDUP(3,%r11,Chi_30,UChi_31) \
VMADDSUBRDUP(6,%r10,Chi_20,UChi_22) VMADDSUBRDUP(6,%r11,Chi_30,UChi_32) \
VSHUF(Chi_01,T0) VSHUF(Chi_11,T1) \
VSHUF(Chi_21,T2) VSHUF(Chi_31,T3) \
VMADDSUBIDUP(1,%r8,T0,UChi_00) VMADDSUBIDUP(1,%r9,T1,UChi_10) \
VMADDSUBIDUP(4,%r8,T0,UChi_01) VMADDSUBIDUP(4,%r9,T1,UChi_11) \
VMADDSUBIDUP(7,%r8,T0,UChi_02) VMADDSUBIDUP(7,%r9,T1,UChi_12) \
VMADDSUBIDUP(1,%r10,T2,UChi_20) VMADDSUBIDUP(1,%r11,T3,UChi_30) \
VMADDSUBIDUP(4,%r10,T2,UChi_21) VMADDSUBIDUP(4,%r11,T3,UChi_31) \
VMADDSUBIDUP(7,%r10,T2,UChi_22) VMADDSUBIDUP(7,%r11,T3,UChi_32) \
VMADDSUBRDUP(1,%r8,Chi_01,UChi_00) VMADDSUBRDUP(1,%r9,Chi_11,UChi_10) \
VMADDSUBRDUP(4,%r8,Chi_01,UChi_01) VMADDSUBRDUP(4,%r9,Chi_11,UChi_11) \
VMADDSUBRDUP(7,%r8,Chi_01,UChi_02) VMADDSUBRDUP(7,%r9,Chi_11,UChi_12) \
VMADDSUBRDUP(1,%r10,Chi_21,UChi_20) VMADDSUBRDUP(1,%r11,Chi_31,UChi_30) \
VMADDSUBRDUP(4,%r10,Chi_21,UChi_21) VMADDSUBRDUP(4,%r11,Chi_31,UChi_31) \
VMADDSUBRDUP(7,%r10,Chi_21,UChi_22) VMADDSUBRDUP(7,%r11,Chi_31,UChi_32) \
VSHUF(Chi_02,T0) VSHUF(Chi_12,T1) \
VSHUF(Chi_22,T2) VSHUF(Chi_32,T3) \
VMADDSUBIDUP(2,%r8,T0,UChi_00) VMADDSUBIDUP(2,%r9,T1,UChi_10) \
VMADDSUBIDUP(5,%r8,T0,UChi_01) VMADDSUBIDUP(5,%r9,T1,UChi_11) \
VMADDSUBIDUP(8,%r8,T0,UChi_02) VMADDSUBIDUP(8,%r9,T1,UChi_12) \
VMADDSUBIDUP(2,%r10,T2,UChi_20) VMADDSUBIDUP(2,%r11,T3,UChi_30) \
VMADDSUBIDUP(5,%r10,T2,UChi_21) VMADDSUBIDUP(5,%r11,T3,UChi_31) \
VMADDSUBIDUP(8,%r10,T2,UChi_22) VMADDSUBIDUP(8,%r11,T3,UChi_32) \
VMADDSUBRDUP(2,%r8,Chi_02,UChi_00) VMADDSUBRDUP(2,%r9,Chi_12,UChi_10) \
VMADDSUBRDUP(5,%r8,Chi_02,UChi_01) VMADDSUBRDUP(5,%r9,Chi_12,UChi_11) \
VMADDSUBRDUP(8,%r8,Chi_02,UChi_02) VMADDSUBRDUP(8,%r9,Chi_12,UChi_12) \
VMADDSUBRDUP(2,%r10,Chi_22,UChi_20) VMADDSUBRDUP(2,%r11,Chi_32,UChi_30) \
VMADDSUBRDUP(5,%r10,Chi_22,UChi_21) VMADDSUBRDUP(5,%r11,Chi_32,UChi_31) \
VMADDSUBRDUP(8,%r10,Chi_22,UChi_22) VMADDSUBRDUP(8,%r11,Chi_32,UChi_32) );
#define MULT_LS(g0,g1,g2,g3) \
asm ( "movq %0, %%r8 \n\t" \
"movq %1, %%r9 \n\t" \
"movq %2, %%r10 \n\t" \
"movq %3, %%r11 \n\t" : : "r"(g0), "r"(g1), "r"(g2), "r"(g3) : "%r8","%r9","%r10","%r11" );\
asm ( \
VSHUF(Chi_00,T0) VSHUF(Chi_10,T1) \
VSHUF(Chi_20,T2) VSHUF(Chi_30,T3) \
VMULIDUP(0,%r8,T0,UChi_00) VMULIDUP(0,%r9,T1,UChi_10) \
VMULIDUP(3,%r8,T0,UChi_01) VMULIDUP(3,%r9,T1,UChi_11) \
VMULIDUP(6,%r8,T0,UChi_02) VMULIDUP(6,%r9,T1,UChi_12) \
VMULIDUP(0,%r10,T2,UChi_20) VMULIDUP(0,%r11,T3,UChi_30) \
VMULIDUP(3,%r10,T2,UChi_21) VMULIDUP(3,%r11,T3,UChi_31) \
VMULIDUP(6,%r10,T2,UChi_22) VMULIDUP(6,%r11,T3,UChi_32) \
VMADDSUBRDUP(0,%r8,Chi_00,UChi_00) VMADDSUBRDUP(0,%r9,Chi_10,UChi_10) \
VMADDSUBRDUP(3,%r8,Chi_00,UChi_01) VMADDSUBRDUP(3,%r9,Chi_10,UChi_11) \
VMADDSUBRDUP(6,%r8,Chi_00,UChi_02) VMADDSUBRDUP(6,%r9,Chi_10,UChi_12) \
VMADDSUBRDUP(0,%r10,Chi_20,UChi_20) VMADDSUBRDUP(0,%r11,Chi_30,UChi_30) \
VMADDSUBRDUP(3,%r10,Chi_20,UChi_21) VMADDSUBRDUP(3,%r11,Chi_30,UChi_31) \
VMADDSUBRDUP(6,%r10,Chi_20,UChi_22) VMADDSUBRDUP(6,%r11,Chi_30,UChi_32) \
VSHUF(Chi_01,T0) VSHUF(Chi_11,T1) \
VSHUF(Chi_21,T2) VSHUF(Chi_31,T3) \
VMADDSUBIDUP(1,%r8,T0,UChi_00) VMADDSUBIDUP(1,%r9,T1,UChi_10) \
VMADDSUBIDUP(4,%r8,T0,UChi_01) VMADDSUBIDUP(4,%r9,T1,UChi_11) \
VMADDSUBIDUP(7,%r8,T0,UChi_02) VMADDSUBIDUP(7,%r9,T1,UChi_12) \
VMADDSUBIDUP(1,%r10,T2,UChi_20) VMADDSUBIDUP(1,%r11,T3,UChi_30) \
VMADDSUBIDUP(4,%r10,T2,UChi_21) VMADDSUBIDUP(4,%r11,T3,UChi_31) \
VMADDSUBIDUP(7,%r10,T2,UChi_22) VMADDSUBIDUP(7,%r11,T3,UChi_32) \
VMADDSUBRDUP(1,%r8,Chi_01,UChi_00) VMADDSUBRDUP(1,%r9,Chi_11,UChi_10) \
VMADDSUBRDUP(4,%r8,Chi_01,UChi_01) VMADDSUBRDUP(4,%r9,Chi_11,UChi_11) \
VMADDSUBRDUP(7,%r8,Chi_01,UChi_02) VMADDSUBRDUP(7,%r9,Chi_11,UChi_12) \
VMADDSUBRDUP(1,%r10,Chi_21,UChi_20) VMADDSUBRDUP(1,%r11,Chi_31,UChi_30) \
VMADDSUBRDUP(4,%r10,Chi_21,UChi_21) VMADDSUBRDUP(4,%r11,Chi_31,UChi_31) \
VMADDSUBRDUP(7,%r10,Chi_21,UChi_22) VMADDSUBRDUP(7,%r11,Chi_31,UChi_32) \
VSHUF(Chi_02,T0) VSHUF(Chi_12,T1) \
VSHUF(Chi_22,T2) VSHUF(Chi_32,T3) \
VMADDSUBIDUP(2,%r8,T0,UChi_00) VMADDSUBIDUP(2,%r9,T1,UChi_10) \
VMADDSUBIDUP(5,%r8,T0,UChi_01) VMADDSUBIDUP(5,%r9,T1,UChi_11) \
VMADDSUBIDUP(8,%r8,T0,UChi_02) VMADDSUBIDUP(8,%r9,T1,UChi_12) \
VMADDSUBIDUP(2,%r10,T2,UChi_20) VMADDSUBIDUP(2,%r11,T3,UChi_30) \
VMADDSUBIDUP(5,%r10,T2,UChi_21) VMADDSUBIDUP(5,%r11,T3,UChi_31) \
VMADDSUBIDUP(8,%r10,T2,UChi_22) VMADDSUBIDUP(8,%r11,T3,UChi_32) \
VMADDSUBRDUP(2,%r8,Chi_02,UChi_00) VMADDSUBRDUP(2,%r9,Chi_12,UChi_10) \
VMADDSUBRDUP(5,%r8,Chi_02,UChi_01) VMADDSUBRDUP(5,%r9,Chi_12,UChi_11) \
VMADDSUBRDUP(8,%r8,Chi_02,UChi_02) VMADDSUBRDUP(8,%r9,Chi_12,UChi_12) \
VMADDSUBRDUP(2,%r10,Chi_22,UChi_20) VMADDSUBRDUP(2,%r11,Chi_32,UChi_30) \
VMADDSUBRDUP(5,%r10,Chi_22,UChi_21) VMADDSUBRDUP(5,%r11,Chi_32,UChi_31) \
VMADDSUBRDUP(8,%r10,Chi_22,UChi_22) VMADDSUBRDUP(8,%r11,Chi_32,UChi_32) );
#define MULT_ADD_XYZTa(g0,g1) \
asm ( "movq %0, %%r8 \n\t" \
"movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9");\
__asm__ ( \
VSHUF(Chi_00,T0) \
VSHUF(Chi_10,T1) \
VMOVIDUP(0,%r8,Z0 ) \
VMOVIDUP(3,%r8,Z1 ) \
VMOVIDUP(6,%r8,Z2 ) \
VMADDSUB(Z0,T0,UChi_00) \
VMADDSUB(Z1,T0,UChi_01) \
VMADDSUB(Z2,T0,UChi_02) \
\
VMOVIDUP(0,%r9,Z0 ) \
VMOVIDUP(3,%r9,Z1 ) \
VMOVIDUP(6,%r9,Z2 ) \
VMADDSUB(Z0,T1,UChi_10) \
VMADDSUB(Z1,T1,UChi_11) \
VMADDSUB(Z2,T1,UChi_12) \
\
\
VMOVRDUP(0,%r8,Z3 ) \
VMOVRDUP(3,%r8,Z4 ) \
VMOVRDUP(6,%r8,Z5 ) \
VMADDSUB(Z3,Chi_00,UChi_00)/*rr * ir = ri rr*/ \
VMADDSUB(Z4,Chi_00,UChi_01) \
VMADDSUB(Z5,Chi_00,UChi_02) \
\
VMOVRDUP(0,%r9,Z3 ) \
VMOVRDUP(3,%r9,Z4 ) \
VMOVRDUP(6,%r9,Z5 ) \
VMADDSUB(Z3,Chi_10,UChi_10) \
VMADDSUB(Z4,Chi_10,UChi_11)\
VMADDSUB(Z5,Chi_10,UChi_12) \
\
\
VMOVIDUP(1,%r8,Z0 ) \
VMOVIDUP(4,%r8,Z1 ) \
VMOVIDUP(7,%r8,Z2 ) \
VSHUF(Chi_01,T0) \
VMADDSUB(Z0,T0,UChi_00) \
VMADDSUB(Z1,T0,UChi_01) \
VMADDSUB(Z2,T0,UChi_02) \
\
VMOVIDUP(1,%r9,Z0 ) \
VMOVIDUP(4,%r9,Z1 ) \
VMOVIDUP(7,%r9,Z2 ) \
VSHUF(Chi_11,T1) \
VMADDSUB(Z0,T1,UChi_10) \
VMADDSUB(Z1,T1,UChi_11) \
VMADDSUB(Z2,T1,UChi_12) \
\
VMOVRDUP(1,%r8,Z3 ) \
VMOVRDUP(4,%r8,Z4 ) \
VMOVRDUP(7,%r8,Z5 ) \
VMADDSUB(Z3,Chi_01,UChi_00) \
VMADDSUB(Z4,Chi_01,UChi_01) \
VMADDSUB(Z5,Chi_01,UChi_02) \
\
VMOVRDUP(1,%r9,Z3 ) \
VMOVRDUP(4,%r9,Z4 ) \
VMOVRDUP(7,%r9,Z5 ) \
VMADDSUB(Z3,Chi_11,UChi_10) \
VMADDSUB(Z4,Chi_11,UChi_11) \
VMADDSUB(Z5,Chi_11,UChi_12) \
\
VSHUF(Chi_02,T0) \
VSHUF(Chi_12,T1) \
VMOVIDUP(2,%r8,Z0 ) \
VMOVIDUP(5,%r8,Z1 ) \
VMOVIDUP(8,%r8,Z2 ) \
VMADDSUB(Z0,T0,UChi_00) \
VMADDSUB(Z1,T0,UChi_01) \
VMADDSUB(Z2,T0,UChi_02) \
VMOVIDUP(2,%r9,Z0 ) \
VMOVIDUP(5,%r9,Z1 ) \
VMOVIDUP(8,%r9,Z2 ) \
VMADDSUB(Z0,T1,UChi_10) \
VMADDSUB(Z1,T1,UChi_11) \
VMADDSUB(Z2,T1,UChi_12) \
/*55*/ \
VMOVRDUP(2,%r8,Z3 ) \
VMOVRDUP(5,%r8,Z4 ) \
VMOVRDUP(8,%r8,Z5 ) \
VMADDSUB(Z3,Chi_02,UChi_00) \
VMADDSUB(Z4,Chi_02,UChi_01) \
VMADDSUB(Z5,Chi_02,UChi_02) \
VMOVRDUP(2,%r9,Z3 ) \
VMOVRDUP(5,%r9,Z4 ) \
VMOVRDUP(8,%r9,Z5 ) \
VMADDSUB(Z3,Chi_12,UChi_10) \
VMADDSUB(Z4,Chi_12,UChi_11) \
VMADDSUB(Z5,Chi_12,UChi_12) \
/*61 insns*/ );
#define MULT_ADD_XYZT(g0,g1) \
asm ( "movq %0, %%r8 \n\t" \
"movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9");\
__asm__ ( \
VSHUFMEM(0,%r8,Z00) VSHUFMEM(0,%r9,Z10) \
VRDUP(Chi_00,T0) VIDUP(Chi_00,Chi_00) \
VRDUP(Chi_10,T1) VIDUP(Chi_10,Chi_10) \
VMUL(Z00,Chi_00,Z1) VMUL(Z10,Chi_10,Z2) \
VSHUFMEM(3,%r8,Z00) VSHUFMEM(3,%r9,Z10) \
VMUL(Z00,Chi_00,Z3) VMUL(Z10,Chi_10,Z4) \
VSHUFMEM(6,%r8,Z00) VSHUFMEM(6,%r9,Z10) \
VMUL(Z00,Chi_00,Z5) VMUL(Z10,Chi_10,Z6) \
VMADDMEM(0,%r8,T0,UChi_00) VMADDMEM(0,%r9,T1,UChi_10) \
VMADDMEM(3,%r8,T0,UChi_01) VMADDMEM(3,%r9,T1,UChi_11) \
VMADDMEM(6,%r8,T0,UChi_02) VMADDMEM(6,%r9,T1,UChi_12) \
VSHUFMEM(1,%r8,Z00) VSHUFMEM(1,%r9,Z10) \
VRDUP(Chi_01,T0) VIDUP(Chi_01,Chi_01) \
VRDUP(Chi_11,T1) VIDUP(Chi_11,Chi_11) \
VMADD(Z00,Chi_01,Z1) VMADD(Z10,Chi_11,Z2) \
VSHUFMEM(4,%r8,Z00) VSHUFMEM(4,%r9,Z10) \
VMADD(Z00,Chi_01,Z3) VMADD(Z10,Chi_11,Z4) \
VSHUFMEM(7,%r8,Z00) VSHUFMEM(7,%r9,Z10) \
VMADD(Z00,Chi_01,Z5) VMADD(Z10,Chi_11,Z6) \
VMADDMEM(1,%r8,T0,UChi_00) VMADDMEM(1,%r9,T1,UChi_10) \
VMADDMEM(4,%r8,T0,UChi_01) VMADDMEM(4,%r9,T1,UChi_11) \
VMADDMEM(7,%r8,T0,UChi_02) VMADDMEM(7,%r9,T1,UChi_12) \
VSHUFMEM(2,%r8,Z00) VSHUFMEM(2,%r9,Z10) \
VRDUP(Chi_02,T0) VIDUP(Chi_02,Chi_02) \
VRDUP(Chi_12,T1) VIDUP(Chi_12,Chi_12) \
VMADD(Z00,Chi_02,Z1) VMADD(Z10,Chi_12,Z2) \
VSHUFMEM(5,%r8,Z00) VSHUFMEM(5,%r9,Z10) \
VMADD(Z00,Chi_02,Z3) VMADD(Z10,Chi_12,Z4) \
VSHUFMEM(8,%r8,Z00) VSHUFMEM(8,%r9,Z10) \
VMADD(Z00,Chi_02,Z5) VMADD(Z10,Chi_12,Z6) \
VMADDSUBMEM(2,%r8,T0,Z1) VMADDSUBMEM(2,%r9,T1,Z2) \
VMADDSUBMEM(5,%r8,T0,Z3) VMADDSUBMEM(5,%r9,T1,Z4) \
VMADDSUBMEM(8,%r8,T0,Z5) VMADDSUBMEM(8,%r9,T1,Z6) \
VADD(Z1,UChi_00,UChi_00) VADD(Z2,UChi_10,UChi_10) \
VADD(Z3,UChi_01,UChi_01) VADD(Z4,UChi_11,UChi_11) \
VADD(Z5,UChi_02,UChi_02) VADD(Z6,UChi_12,UChi_12) );
#define MULT_XYZT(g0,g1) \
asm ( "movq %0, %%r8 \n\t" \
"movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9" ); \
__asm__ ( \
VSHUF(Chi_00,T0) \
VSHUF(Chi_10,T1) \
VMOVIDUP(0,%r8,Z0 ) \
VMOVIDUP(3,%r8,Z1 ) \
VMOVIDUP(6,%r8,Z2 ) \
/*6*/ \
VMUL(Z0,T0,UChi_00) \
VMUL(Z1,T0,UChi_01) \
VMUL(Z2,T0,UChi_02) \
VMOVIDUP(0,%r9,Z0 ) \
VMOVIDUP(3,%r9,Z1 ) \
VMOVIDUP(6,%r9,Z2 ) \
VMUL(Z0,T1,UChi_10) \
VMUL(Z1,T1,UChi_11) \
VMUL(Z2,T1,UChi_12) \
VMOVRDUP(0,%r8,Z3 ) \
VMOVRDUP(3,%r8,Z4 ) \
VMOVRDUP(6,%r8,Z5 ) \
/*18*/ \
VMADDSUB(Z3,Chi_00,UChi_00) \
VMADDSUB(Z4,Chi_00,UChi_01)\
VMADDSUB(Z5,Chi_00,UChi_02) \
VMOVRDUP(0,%r9,Z3 ) \
VMOVRDUP(3,%r9,Z4 ) \
VMOVRDUP(6,%r9,Z5 ) \
VMADDSUB(Z3,Chi_10,UChi_10) \
VMADDSUB(Z4,Chi_10,UChi_11)\
VMADDSUB(Z5,Chi_10,UChi_12) \
VMOVIDUP(1,%r8,Z0 ) \
VMOVIDUP(4,%r8,Z1 ) \
VMOVIDUP(7,%r8,Z2 ) \
/*28*/ \
VSHUF(Chi_01,T0) \
VMADDSUB(Z0,T0,UChi_00) \
VMADDSUB(Z1,T0,UChi_01) \
VMADDSUB(Z2,T0,UChi_02) \
VMOVIDUP(1,%r9,Z0 ) \
VMOVIDUP(4,%r9,Z1 ) \
VMOVIDUP(7,%r9,Z2 ) \
VSHUF(Chi_11,T1) \
VMADDSUB(Z0,T1,UChi_10) \
VMADDSUB(Z1,T1,UChi_11) \
VMADDSUB(Z2,T1,UChi_12) \
VMOVRDUP(1,%r8,Z3 ) \
VMOVRDUP(4,%r8,Z4 ) \
VMOVRDUP(7,%r8,Z5 ) \
/*38*/ \
VMADDSUB(Z3,Chi_01,UChi_00) \
VMADDSUB(Z4,Chi_01,UChi_01) \
VMADDSUB(Z5,Chi_01,UChi_02) \
VMOVRDUP(1,%r9,Z3 ) \
VMOVRDUP(4,%r9,Z4 ) \
VMOVRDUP(7,%r9,Z5 ) \
VMADDSUB(Z3,Chi_11,UChi_10) \
VMADDSUB(Z4,Chi_11,UChi_11) \
VMADDSUB(Z5,Chi_11,UChi_12) \
/*48*/ \
VSHUF(Chi_02,T0) \
VSHUF(Chi_12,T1) \
VMOVIDUP(2,%r8,Z0 ) \
VMOVIDUP(5,%r8,Z1 ) \
VMOVIDUP(8,%r8,Z2 ) \
VMADDSUB(Z0,T0,UChi_00) \
VMADDSUB(Z1,T0,UChi_01) \
VMADDSUB(Z2,T0,UChi_02) \
VMOVIDUP(2,%r9,Z0 ) \
VMOVIDUP(5,%r9,Z1 ) \
VMOVIDUP(8,%r9,Z2 ) \
VMADDSUB(Z0,T1,UChi_10) \
VMADDSUB(Z1,T1,UChi_11) \
VMADDSUB(Z2,T1,UChi_12) \
/*55*/ \
VMOVRDUP(2,%r8,Z3 ) \
VMOVRDUP(5,%r8,Z4 ) \
VMOVRDUP(8,%r8,Z5 ) \
VMADDSUB(Z3,Chi_02,UChi_00) \
VMADDSUB(Z4,Chi_02,UChi_01) \
VMADDSUB(Z5,Chi_02,UChi_02) \
VMOVRDUP(2,%r9,Z3 ) \
VMOVRDUP(5,%r9,Z4 ) \
VMOVRDUP(8,%r9,Z5 ) \
VMADDSUB(Z3,Chi_12,UChi_10) \
VMADDSUB(Z4,Chi_12,UChi_11) \
VMADDSUB(Z5,Chi_12,UChi_12) \
/*61 insns*/ );
#define MULT_XYZTa(g0,g1) \
asm ( "movq %0, %%r8 \n\t" \
"movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9" ); \
__asm__ ( \
VSHUFMEM(0,%r8,Z00) VSHUFMEM(0,%r9,Z10) \
VRDUP(Chi_00,T0) VIDUP(Chi_00,Chi_00) \
VRDUP(Chi_10,T1) VIDUP(Chi_10,Chi_10) \
VMUL(Z00,Chi_00,Z1) VMUL(Z10,Chi_10,Z2) \
VSHUFMEM(3,%r8,Z00) VSHUFMEM(3,%r9,Z10) \
VMUL(Z00,Chi_00,Z3) VMUL(Z10,Chi_10,Z4) \
VSHUFMEM(6,%r8,Z00) VSHUFMEM(6,%r9,Z10) \
VMUL(Z00,Chi_00,Z5) VMUL(Z10,Chi_10,Z6) \
VMULMEM(0,%r8,T0,UChi_00) VMULMEM(0,%r9,T1,UChi_10) \
VMULMEM(3,%r8,T0,UChi_01) VMULMEM(3,%r9,T1,UChi_11) \
VMULMEM(6,%r8,T0,UChi_02) VMULMEM(6,%r9,T1,UChi_12) \
VSHUFMEM(1,%r8,Z00) VSHUFMEM(1,%r9,Z10) \
VRDUP(Chi_01,T0) VIDUP(Chi_01,Chi_01) \
VRDUP(Chi_11,T1) VIDUP(Chi_11,Chi_11) \
VMADD(Z00,Chi_01,Z1) VMADD(Z10,Chi_11,Z2) \
VSHUFMEM(4,%r8,Z00) VSHUFMEM(4,%r9,Z10) \
VMADD(Z00,Chi_01,Z3) VMADD(Z10,Chi_11,Z4) \
VSHUFMEM(7,%r8,Z00) VSHUFMEM(7,%r9,Z10) \
VMADD(Z00,Chi_01,Z5) VMADD(Z10,Chi_11,Z6) \
VMADDMEM(1,%r8,T0,UChi_00) VMADDMEM(1,%r9,T1,UChi_10) \
VMADDMEM(4,%r8,T0,UChi_01) VMADDMEM(4,%r9,T1,UChi_11) \
VMADDMEM(7,%r8,T0,UChi_02) VMADDMEM(7,%r9,T1,UChi_12) \
VSHUFMEM(2,%r8,Z00) VSHUFMEM(2,%r9,Z10) \
VRDUP(Chi_02,T0) VIDUP(Chi_02,Chi_02) \
VRDUP(Chi_12,T1) VIDUP(Chi_12,Chi_12) \
VMADD(Z00,Chi_02,Z1) VMADD(Z10,Chi_12,Z2) \
VSHUFMEM(5,%r8,Z00) VSHUFMEM(5,%r9,Z10) \
VMADD(Z00,Chi_02,Z3) VMADD(Z10,Chi_12,Z4) \
VSHUFMEM(8,%r8,Z00) VSHUFMEM(8,%r9,Z10) \
VMADD(Z00,Chi_02,Z5) VMADD(Z10,Chi_12,Z6) \
VMADDSUBMEM(2,%r8,T0,Z1) VMADDSUBMEM(2,%r9,T1,Z2) \
VMADDSUBMEM(5,%r8,T0,Z3) VMADDSUBMEM(5,%r9,T1,Z4) \
VMADDSUBMEM(8,%r8,T0,Z5) VMADDSUBMEM(8,%r9,T1,Z6) \
VADD(Z1,UChi_00,UChi_00) VADD(Z2,UChi_10,UChi_10) \
VADD(Z3,UChi_01,UChi_01) VADD(Z4,UChi_11,UChi_11) \
VADD(Z5,UChi_02,UChi_02) VADD(Z6,UChi_12,UChi_12) );
#define LOAD_CHI(a0,a1,a2,a3) \
asm ( \
"movq %0, %%r8 \n\t" \
VLOAD(0,%%r8,pChi_00) \
VLOAD(1,%%r8,pChi_01) \
VLOAD(2,%%r8,pChi_02) \
: : "r" (a0) : "%r8" ); \
asm ( \
"movq %0, %%r8 \n\t" \
VLOAD(0,%%r8,pChi_10) \
VLOAD(1,%%r8,pChi_11) \
VLOAD(2,%%r8,pChi_12) \
: : "r" (a1) : "%r8" ); \
asm ( \
"movq %0, %%r8 \n\t" \
VLOAD(0,%%r8,pChi_20) \
VLOAD(1,%%r8,pChi_21) \
VLOAD(2,%%r8,pChi_22) \
: : "r" (a2) : "%r8" ); \
asm ( \
"movq %0, %%r8 \n\t" \
VLOAD(0,%%r8,pChi_30) \
VLOAD(1,%%r8,pChi_31) \
VLOAD(2,%%r8,pChi_32) \
: : "r" (a3) : "%r8" );
#define LOAD_CHIa(a0,a1) \
asm ( \
"movq %0, %%r8 \n\t" \
VLOAD(0,%%r8,pChi_00) \
VLOAD(1,%%r8,pChi_01) \
VLOAD(2,%%r8,pChi_02) \
: : "r" (a0) : "%r8" ); \
asm ( \
"movq %0, %%r8 \n\t" \
VLOAD(0,%%r8,pChi_10) \
VLOAD(1,%%r8,pChi_11) \
VLOAD(2,%%r8,pChi_12) \
: : "r" (a1) : "%r8" );
#define PF_CHI(a0)
#define PF_CHIa(a0) \
asm ( \
"movq %0, %%r8 \n\t" \
VPREFETCH1(0,%%r8) \
VPREFETCH1(1,%%r8) \
VPREFETCH1(2,%%r8) \
: : "r" (a0) : "%r8" ); \
#define PF_GAUGE_XYZT(a0)
#define PF_GAUGE_XYZTa(a0) \
asm ( \
"movq %0, %%r8 \n\t" \
VPREFETCH1(0,%%r8) \
VPREFETCH1(1,%%r8) \
VPREFETCH1(2,%%r8) \
VPREFETCH1(3,%%r8) \
VPREFETCH1(4,%%r8) \
VPREFETCH1(5,%%r8) \
VPREFETCH1(6,%%r8) \
VPREFETCH1(7,%%r8) \
VPREFETCH1(8,%%r8) \
: : "r" (a0) : "%r8" ); \
#define PF_GAUGE_LS(a0)
#define PF_GAUGE_LSa(a0) \
asm ( \
"movq %0, %%r8 \n\t" \
VPREFETCH1(0,%%r8) \
VPREFETCH1(1,%%r8) \
: : "r" (a0) : "%r8" ); \
#define REDUCE(out) \
asm ( \
VADD(UChi_00,UChi_10,UChi_00) \
VADD(UChi_01,UChi_11,UChi_01) \
VADD(UChi_02,UChi_12,UChi_02) \
VADD(UChi_30,UChi_20,UChi_30) \
VADD(UChi_31,UChi_21,UChi_31) \
VADD(UChi_32,UChi_22,UChi_32) \
VADD(UChi_00,UChi_30,UChi_00) \
VADD(UChi_01,UChi_31,UChi_01) \
VADD(UChi_02,UChi_32,UChi_02) ); \
asm ( \
VSTORE(0,%0,pUChi_00) \
VSTORE(1,%0,pUChi_01) \
VSTORE(2,%0,pUChi_02) \
: : "r" (out) : "memory" );
#define REDUCEa(out) \
asm ( \
VADD(UChi_00,UChi_10,UChi_00) \
VADD(UChi_01,UChi_11,UChi_01) \
VADD(UChi_02,UChi_12,UChi_02) ); \
asm ( \
VSTORE(0,%0,pUChi_00) \
VSTORE(1,%0,pUChi_01) \
VSTORE(2,%0,pUChi_02) \
: : "r" (out) : "memory" );
#define PERMUTE_DIR(dir) \
permute##dir(Chi_0,Chi_0);\
permute##dir(Chi_1,Chi_1);\
permute##dir(Chi_2,Chi_2);
namespace Grid {
namespace QCD {
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
{
assert(0);
};
//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in._odata[o] ; } else { out =(uint64_t) &buf[o]; }
#define CONDITIONAL_MOVE(l,o,out) { const SiteSpinor *ptr = l? in_p : buf; out = (uint64_t) &ptr[o]; }
#define PREPARE_XYZT(X,Y,Z,T,skew,UU) \
PREPARE(X,Y,Z,T,skew,UU); \
PF_GAUGE_XYZT(gauge0); \
PF_GAUGE_XYZT(gauge1); \
PF_GAUGE_XYZT(gauge2); \
PF_GAUGE_XYZT(gauge3);
#define PREPARE_LS(X,Y,Z,T,skew,UU) \
PREPARE(X,Y,Z,T,skew,UU); \
PF_GAUGE_LS(gauge0); \
PF_GAUGE_LS(gauge1); \
PF_GAUGE_LS(gauge2); \
PF_GAUGE_LS(gauge3);
#define PREPARE(X,Y,Z,T,skew,UU) \
SE0=st.GetEntry(ptype,X+skew,sF); \
o0 = SE0->_offset; \
l0 = SE0->_is_local; \
p0 = SE0->_permute; \
CONDITIONAL_MOVE(l0,o0,addr0); \
PF_CHI(addr0); \
\
SE1=st.GetEntry(ptype,Y+skew,sF); \
o1 = SE1->_offset; \
l1 = SE1->_is_local; \
p1 = SE1->_permute; \
CONDITIONAL_MOVE(l1,o1,addr1); \
PF_CHI(addr1); \
\
SE2=st.GetEntry(ptype,Z+skew,sF); \
o2 = SE2->_offset; \
l2 = SE2->_is_local; \
p2 = SE2->_permute; \
CONDITIONAL_MOVE(l2,o2,addr2); \
PF_CHI(addr2); \
\
SE3=st.GetEntry(ptype,T+skew,sF); \
o3 = SE3->_offset; \
l3 = SE3->_is_local; \
p3 = SE3->_permute; \
CONDITIONAL_MOVE(l3,o3,addr3); \
PF_CHI(addr3); \
\
gauge0 =(uint64_t)&UU._odata[sU]( X ); \
gauge1 =(uint64_t)&UU._odata[sU]( Y ); \
gauge2 =(uint64_t)&UU._odata[sU]( Z ); \
gauge3 =(uint64_t)&UU._odata[sU]( T );
// This is the single precision 5th direction vectorised kernel
#include <simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
int p0,p1,p2,p3; // perm
int ptype;
StencilEntry *SE0;
StencilEntry *SE1;
StencilEntry *SE2;
StencilEntry *SE3;
for(int s=0;s<LLs;s++){
int sF=s+LLs*sU;
// Xp, Yp, Zp, Tp
PREPARE(Xp,Yp,Zp,Tp,0,U);
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_LS(gauge0,gauge1,gauge2,gauge3);
PREPARE(Xm,Ym,Zm,Tm,0,U);
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
REDUCE(addr0);
}
#else
assert(0);
#endif
}
#include <simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
int p0,p1,p2,p3; // perm
int ptype;
StencilEntry *SE0;
StencilEntry *SE1;
StencilEntry *SE2;
StencilEntry *SE3;
for(int s=0;s<LLs;s++){
int sF=s+LLs*sU;
// Xp, Yp, Zp, Tp
PREPARE(Xp,Yp,Zp,Tp,0,U);
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_LS(gauge0,gauge1,gauge2,gauge3);
PREPARE(Xm,Ym,Zm,Tm,0,U);
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
LOAD_CHI(addr0,addr1,addr2,addr3);
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
REDUCE(addr0);
}
#else
assert(0);
#endif
}
#define PERMUTE_DIR3 __asm__ ( \
VPERM3(Chi_00,Chi_00) \
VPERM3(Chi_01,Chi_01) \
VPERM3(Chi_02,Chi_02) );
#define PERMUTE_DIR2 __asm__ ( \
VPERM2(Chi_10,Chi_10) \
VPERM2(Chi_11,Chi_11) \
VPERM2(Chi_12,Chi_12) );
#define PERMUTE_DIR1 __asm__ ( \
VPERM1(Chi_00,Chi_00) \
VPERM1(Chi_01,Chi_01) \
VPERM1(Chi_02,Chi_02) );
#define PERMUTE_DIR0 __asm__ ( \
VPERM0(Chi_10,Chi_10) \
VPERM0(Chi_11,Chi_11) \
VPERM0(Chi_12,Chi_12) );
#define PERMUTE01 \
if ( p0 ) { PERMUTE_DIR3; }\
if ( p1 ) { PERMUTE_DIR2; }
#define PERMUTE23 \
if ( p2 ) { PERMUTE_DIR1; }\
if ( p3 ) { PERMUTE_DIR0; }
// This is the single precision 5th direction vectorised kernel
#include <simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
int p0,p1,p2,p3; // perm
int ptype;
StencilEntry *SE0;
StencilEntry *SE1;
StencilEntry *SE2;
StencilEntry *SE3;
for(int s=0;s<LLs;s++){
int sF=s+LLs*sU;
// Xp, Yp, Zp, Tp
PREPARE(Xp,Yp,Zp,Tp,0,U);
LOAD_CHIa(addr0,addr1);
PERMUTE01;
MULT_XYZT(gauge0,gauge1);
LOAD_CHIa(addr2,addr3);
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
PREPARE(Xm,Ym,Zm,Tm,0,U);
LOAD_CHIa(addr0,addr1);
PERMUTE01;
MULT_ADD_XYZT(gauge0,gauge1);
LOAD_CHIa(addr2,addr3);
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
LOAD_CHIa(addr0,addr1);
PERMUTE01;
MULT_ADD_XYZT(gauge0,gauge1);
LOAD_CHIa(addr2,addr3);
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
LOAD_CHIa(addr0,addr1);
PERMUTE01;
MULT_ADD_XYZT(gauge0,gauge1);
LOAD_CHIa(addr2,addr3);
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
REDUCEa(addr0);
}
#else
assert(0);
#endif
}
#include <simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
uint64_t addr0,addr1,addr2,addr3;
const SiteSpinor *in_p; in_p = &in._odata[0];
int o0,o1,o2,o3; // offsets
int l0,l1,l2,l3; // local
int p0,p1,p2,p3; // perm
int ptype;
StencilEntry *SE0;
StencilEntry *SE1;
StencilEntry *SE2;
StencilEntry *SE3;
for(int s=0;s<LLs;s++){
int sF=s+LLs*sU;
// Xp, Yp, Zp, Tp
PREPARE(Xp,Yp,Zp,Tp,0,U);
LOAD_CHIa(addr0,addr1);
PERMUTE01;
MULT_XYZT(gauge0,gauge1);
LOAD_CHIa(addr2,addr3);
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
PREPARE(Xm,Ym,Zm,Tm,0,U);
LOAD_CHIa(addr0,addr1);
PERMUTE01;
MULT_ADD_XYZT(gauge0,gauge1);
LOAD_CHIa(addr2,addr3);
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
LOAD_CHIa(addr0,addr1);
PERMUTE01;
MULT_ADD_XYZT(gauge0,gauge1);
LOAD_CHIa(addr2,addr3);
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
LOAD_CHIa(addr0,addr1);
PERMUTE01;
MULT_ADD_XYZT(gauge0,gauge1);
LOAD_CHIa(addr2,addr3);
PERMUTE23;
MULT_ADD_XYZT(gauge2,gauge3);
addr0 = (uint64_t) &out._odata[sF];
REDUCEa(addr0);
}
#else
assert(0);
#endif
}
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
}}

View File

@ -0,0 +1,305 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#define REGISTER
#define LOAD_CHI(b) \
const SiteSpinor & ref (b[offset]); \
Chi_0=ref()()(0);\
Chi_1=ref()()(1);\
Chi_2=ref()()(2);
// To splat or not to splat depends on the implementation
#define MULT(A,UChi) \
auto & ref(U._odata[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
Impl::loadLinkElement(U_01,ref()(0,1)); \
Impl::loadLinkElement(U_11,ref()(1,1)); \
Impl::loadLinkElement(U_21,ref()(2,1)); \
Impl::loadLinkElement(U_02,ref()(0,2)); \
Impl::loadLinkElement(U_12,ref()(1,2)); \
Impl::loadLinkElement(U_22,ref()(2,2)); \
UChi ## _0 = U_00*Chi_0; \
UChi ## _1 = U_10*Chi_0;\
UChi ## _2 = U_20*Chi_0;\
UChi ## _0 += U_01*Chi_1;\
UChi ## _1 += U_11*Chi_1;\
UChi ## _2 += U_21*Chi_1;\
UChi ## _0 += U_02*Chi_2;\
UChi ## _1 += U_12*Chi_2;\
UChi ## _2 += U_22*Chi_2;
#define MULT_ADD(A,UChi) \
auto & ref(U._odata[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
Impl::loadLinkElement(U_01,ref()(0,1)); \
Impl::loadLinkElement(U_11,ref()(1,1)); \
Impl::loadLinkElement(U_21,ref()(2,1)); \
Impl::loadLinkElement(U_02,ref()(0,2)); \
Impl::loadLinkElement(U_12,ref()(1,2)); \
Impl::loadLinkElement(U_22,ref()(2,2)); \
UChi ## _0 += U_00*Chi_0; \
UChi ## _1 += U_10*Chi_0;\
UChi ## _2 += U_20*Chi_0;\
UChi ## _0 += U_01*Chi_1;\
UChi ## _1 += U_11*Chi_1;\
UChi ## _2 += U_21*Chi_1;\
UChi ## _0 += U_02*Chi_2;\
UChi ## _1 += U_12*Chi_2;\
UChi ## _2 += U_22*Chi_2;
#define PERMUTE_DIR(dir) \
permute##dir(Chi_0,Chi_0);\
permute##dir(Chi_1,Chi_1);\
permute##dir(Chi_2,Chi_2);
namespace Grid {
namespace QCD {
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out, int dag)
{
SiteSpinor naik;
SiteSpinor naive;
int oneLink =0;
int threeLink=1;
int skew(0);
Real scale(1.0);
if(dag) scale = -1.0;
for(int s=0;s<LLs;s++){
int sF=s+LLs*sU;
DhopSiteDepthHand(st,lo,U,buf,sF,sU,in,naive,oneLink);
DhopSiteDepthHand(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
out._odata[sF] =scale*(naive+naik);
}
}
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteSpinor *buf, int sF,
int sU, const FermionField &in, SiteSpinor &out,int threeLink)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
REGISTER Simd even_0; // 12 regs on knc
REGISTER Simd even_1;
REGISTER Simd even_2;
REGISTER Simd odd_0; // 12 regs on knc
REGISTER Simd odd_1;
REGISTER Simd odd_2;
REGISTER Simd Chi_0; // two spinor; 6 regs
REGISTER Simd Chi_1;
REGISTER Simd Chi_2;
REGISTER Simd U_00; // two rows of U matrix
REGISTER Simd U_10;
REGISTER Simd U_20;
REGISTER Simd U_01;
REGISTER Simd U_11;
REGISTER Simd U_21; // 2 reg left.
REGISTER Simd U_02;
REGISTER Simd U_12;
REGISTER Simd U_22;
int skew = 0;
if (threeLink) skew=8;
int offset,local,perm, ptype;
StencilEntry *SE;
// Xp
SE=st.GetEntry(ptype,Xp+skew,sF);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHI(in._odata);
if ( perm) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI(buf);
}
{
MULT(Xp,even);
}
// Yp
SE=st.GetEntry(ptype,Yp+skew,sF);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHI(in._odata);
if ( perm) {
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI(buf);
}
{
MULT(Yp,odd);
}
// Zp
SE=st.GetEntry(ptype,Zp+skew,sF);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHI(in._odata);
if ( perm) {
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI(buf);
}
{
MULT_ADD(Zp,even);
}
// Tp
SE=st.GetEntry(ptype,Tp+skew,sF);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHI(in._odata);
if ( perm) {
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI(buf);
}
{
MULT_ADD(Tp,odd);
}
// Xm
SE=st.GetEntry(ptype,Xm+skew,sF);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHI(in._odata);
if ( perm) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI(buf);
}
{
MULT_ADD(Xm,even);
}
// Ym
SE=st.GetEntry(ptype,Ym+skew,sF);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHI(in._odata);
if ( perm) {
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI(buf);
}
{
MULT_ADD(Ym,odd);
}
// Zm
SE=st.GetEntry(ptype,Zm+skew,sF);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHI(in._odata);
if ( perm) {
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI(buf);
}
{
MULT_ADD(Zm,even);
}
// Tm
SE=st.GetEntry(ptype,Tm+skew,sF);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHI(in._odata);
if ( perm) {
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI(buf);
}
{
MULT_ADD(Tm,odd);
}
vstream(out()()(0),even_0+odd_0);
vstream(out()()(1),even_1+odd_1);
vstream(out()()(2),even_2+odd_2);
}
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
}}

View File

@ -34,10 +34,9 @@ directory
namespace Grid {
namespace QCD {
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2,
3});
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1,
-1, -1});
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
int WilsonFermionStatic::HandOptDslash;
/////////////////////////////////
@ -224,8 +223,7 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
////////////////////////
PARALLEL_FOR_LOOP
for (int sss = 0; sss < B._grid->oSites(); sss++) {
Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu,
gamma);
Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma);
}
//////////////////////////////////////////////////
@ -335,8 +333,7 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out,
dirdisp, gamma);
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma);
}
};
@ -353,14 +350,12 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
if (dag == DaggerYes) {
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
out);
Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
} else {
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
out);
Kernels::DiracOptDhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
}
}
};

View File

@ -63,71 +63,55 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
LebesgueEvenOdd(_FourDimRedBlackGrid),
_tmp(&FiveDimRedBlackGrid)
{
// some assertions
assert(FiveDimGrid._ndimension==5);
assert(FourDimGrid._ndimension==4);
assert(FourDimRedBlackGrid._ndimension==4);
assert(FiveDimRedBlackGrid._ndimension==5);
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
// extent of fifth dim and not spread out
Ls=FiveDimGrid._fdimensions[0];
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
assert(FiveDimGrid._processors[0] ==1);
assert(FiveDimRedBlackGrid._processors[0] ==1);
// Other dimensions must match the decomposition of the four-D fields
for(int d=0;d<4;d++){
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
}
if (Impl::LsVectorised) {
int nsimd = Simd::Nsimd();
// some assertions
assert(FiveDimGrid._ndimension==5);
assert(FiveDimRedBlackGrid._ndimension==5);
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
assert(FourDimGrid._ndimension==4);
// Dimension zero of the five-d is the Ls direction
Ls=FiveDimGrid._fdimensions[0];
assert(FiveDimGrid._processors[0] ==1);
assert(FiveDimGrid._simd_layout[0] ==nsimd);
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
assert(FiveDimRedBlackGrid._processors[0] ==1);
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
// Other dimensions must match the decomposition of the four-D fields
for(int d=0;d<4;d++){
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FourDimGrid._simd_layout[d]=1);
assert(FourDimRedBlackGrid._simd_layout[d]=1);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
}
} else {
// some assertions
assert(FiveDimGrid._ndimension==5);
assert(FourDimGrid._ndimension==4);
assert(FiveDimRedBlackGrid._ndimension==5);
assert(FourDimRedBlackGrid._ndimension==4);
assert(FiveDimRedBlackGrid._checker_dim==1);
// Dimension zero of the five-d is the Ls direction
Ls=FiveDimGrid._fdimensions[0];
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
assert(FiveDimRedBlackGrid._processors[0] ==1);
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
assert(FiveDimGrid._processors[0] ==1);
assert(FiveDimGrid._simd_layout[0] ==1);
// Other dimensions must match the decomposition of the four-D fields
for(int d=0;d<4;d++){
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
}
}
// Allocate the required comms buffer

View File

@ -63,8 +63,7 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
Phi(Op.FermionGrid()){};
//////////////////////////////////////////////////////////////////////////////////////
// Push the gauge field in to the dops. Assume any BC's and smearing already
// applied
// Push the gauge field in to the dops. Assume any BC's and smearing already applied
//////////////////////////////////////////////////////////////////////////////////////
virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) {
// P(phi) = e^{- phi^dag (MdagM)^-1 phi}
@ -107,8 +106,7 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
MdagMOp.Op(X, Y);
RealD action = norm2(Y);
std::cout << GridLogMessage << "Pseudofermion action " << action
<< std::endl;
std::cout << GridLogMessage << "Pseudofermion action " << action << std::endl;
return action;
};
@ -119,6 +117,7 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
//
// = - Ydag dM X - Xdag dMdag Y
//
//
//////////////////////////////////////////////////////
virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
FermOp.ImportGauge(U);
@ -133,8 +132,7 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
DerivativeSolver(MdagMOp, Phi, X); // X = (MdagM)^-1 phi
MdagMOp.Op(X, Y); // Y = M X = (Mdag)^-1 phi
// Our conventions really make this UdSdU; We do not differentiate wrt Udag
// here.
// Our conventions really make this UdSdU; We do not differentiate wrt Udag here.
// So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt.
FermOp.MDeriv(tmp, Y, X, DaggerNo);

View File

@ -91,7 +91,7 @@ int main (int argc, char ** argv)
GridParallelRNG sRNG4(sUGrid); sRNG4.SeedFixedIntegers(seeds4);
GridParallelRNG sRNG5(sFGrid); sRNG5.SeedFixedIntegers(seeds5);
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
RealD mass=0.1;
RealD M5 =1.8;

View File

@ -64,7 +64,7 @@ int main (int argc, char ** argv)
LatticeFermion ref(FGrid); ref=zero;
LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid);
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
for(int mu=0;mu<Nd;mu++){

View File

@ -70,7 +70,7 @@ int main (int argc, char ** argv)
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
RealD mass=0.1;

View File

@ -72,7 +72,7 @@ int main (int argc, char ** argv)
LatticeFermion ref(FGrid); ref=zero;
LatticeFermion tmp(FGrid); tmp=zero;
LatticeFermion err(FGrid); tmp=zero;
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
// Only one non-zero (y)

View File

@ -81,7 +81,7 @@ int main (int argc, char ** argv)
LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid);
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
// Only one non-zero (y)

View File

@ -61,7 +61,7 @@ int main (int argc, char ** argv)
FermionField ref(&Grid); ref=zero;
FermionField tmp(&Grid); tmp=zero;
FermionField err(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;

View File

@ -0,0 +1,291 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_wilson.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
// pRNG.SeedRandomDevice();
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
typename ImprovedStaggeredFermionR::ImplParams params;
FermionField src (&Grid); random(pRNG,src);
FermionField result(&Grid); result=zero;
FermionField ref(&Grid); ref=zero;
FermionField tmp(&Grid); tmp=zero;
FermionField err(&Grid); tmp=zero;
FermionField phi (&Grid); random(pRNG,phi);
FermionField chi (&Grid); random(pRNG,chi);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
// Only one non-zero (y)
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
/* Debug force unit
U[mu] = 1.0;
PokeIndex<LorentzIndex>(Umu,U[mu],mu);
*/
}
ref = zero;
RealD mass=0.1;
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
{ // Simple improved staggered implementation
ref = zero;
RealD c1tad = 0.5*c1/u0;
RealD c2tad = 0.5*c2/u0/u0/u0;
Lattice<iScalar<vInteger> > coor(&Grid);
Lattice<iScalar<vInteger> > x(&Grid); LatticeCoordinate(x,0);
Lattice<iScalar<vInteger> > y(&Grid); LatticeCoordinate(y,1);
Lattice<iScalar<vInteger> > z(&Grid); LatticeCoordinate(z,2);
Lattice<iScalar<vInteger> > t(&Grid); LatticeCoordinate(t,3);
Lattice<iScalar<vInteger> > lin_z(&Grid); lin_z=x+y;
Lattice<iScalar<vInteger> > lin_t(&Grid); lin_t=x+y+z;
for(int mu=0;mu<Nd;mu++){
// Staggered Phase.
ComplexField phases(&Grid); phases=1.0;
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
tmp = PeriodicBC::CovShiftForward(U[mu],mu,src);
ref = ref +c1tad*tmp*phases; // Forward 1 hop
tmp = PeriodicBC::CovShiftForward(U[mu],mu,tmp); // 2 hop
tmp = PeriodicBC::CovShiftForward(U[mu],mu,tmp); // 3 hop
ref = ref +c2tad*tmp*phases; // Forward 3 hop
tmp = PeriodicBC::CovShiftBackward(U[mu],mu,src);
ref = ref -c1tad*tmp*phases; // Forward 3 hop
tmp = PeriodicBC::CovShiftBackward(U[mu],mu,tmp);
tmp = PeriodicBC::CovShiftBackward(U[mu],mu,tmp);
ref = ref -c2tad*tmp*phases; // Forward 3 hop
}
// ref = ref + mass * src;
}
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Testing Dhop against cshift implementation "<<std::endl;
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
int ncall=1000;
double t0=usecond();
for(int i=0;i<ncall;i++){
Ds.Dhop(src,result,0);
}
double t1=usecond();
double t2;
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Testing that Deo + Doe = Dunprec "<<std::endl;
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
FermionField src_e (&RBGrid);
FermionField src_o (&RBGrid);
FermionField r_e (&RBGrid);
FermionField r_o (&RBGrid);
FermionField r_eo (&Grid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd,src_o,src);
Ds.Meooe(src_e,r_o); std::cout<<GridLogMessage<<"Applied Meo"<<std::endl;
Ds.Meooe(src_o,r_e); std::cout<<GridLogMessage<<"Applied Moe"<<std::endl;
Ds.Dhop (src,ref,DaggerNo);
setCheckerboard(r_eo,r_o);
setCheckerboard(r_eo,r_e);
err= ref - r_eo;
std::cout<<GridLogMessage << "EO norm diff "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Test Ddagger is the dagger of D by requiring "<<std::endl;
std::cout<<GridLogMessage<<"= < phi | Deo | chi > * = < chi | Deo^dag| phi> "<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
FermionField chi_e (&RBGrid);
FermionField chi_o (&RBGrid);
FermionField dchi_e (&RBGrid);
FermionField dchi_o (&RBGrid);
FermionField phi_e (&RBGrid);
FermionField phi_o (&RBGrid);
FermionField dphi_e (&RBGrid);
FermionField dphi_o (&RBGrid);
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
Ds.Meooe(chi_e,dchi_o);
Ds.Meooe(chi_o,dchi_e);
Ds.MeooeDag(phi_e,dphi_o);
Ds.MeooeDag(phi_o,dphi_e);
ComplexD pDce = innerProduct(phi_e,dchi_e);
ComplexD pDco = innerProduct(phi_o,dchi_o);
ComplexD cDpe = innerProduct(chi_e,dphi_e);
ComplexD cDpo = innerProduct(chi_o,dphi_o);
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Test MeeInv Mee = 1 "<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
Ds.Mooee(chi_e,src_e);
Ds.MooeeInv(src_e,phi_e);
Ds.Mooee(chi_o,src_o);
Ds.MooeeInv(src_o,phi_o);
setCheckerboard(phi,phi_e);
setCheckerboard(phi,phi_o);
err = phi-chi;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Test MeeInvDag MeeDag = 1 "<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
Ds.MooeeDag(chi_e,src_e);
Ds.MooeeInvDag(src_e,phi_e);
Ds.MooeeDag(chi_o,src_o);
Ds.MooeeInvDag(src_o,phi_o);
setCheckerboard(phi,phi_e);
setCheckerboard(phi,phi_o);
err = phi-chi;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Test MpcDagMpc is Hermitian "<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
random(pRNG,phi);
random(pRNG,chi);
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
SchurDiagMooeeOperator<ImprovedStaggeredFermionR,FermionField> HermOpEO(Ds);
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
pDce = innerProduct(phi_e,dchi_e);
pDco = innerProduct(phi_o,dchi_o);
cDpe = innerProduct(chi_e,dphi_e);
cDpo = innerProduct(chi_o,dphi_o);
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,309 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_wilson.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
const int Ls=16;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG4(UGrid);
GridParallelRNG pRNG5(FGrid);
pRNG4.SeedFixedIntegers(seeds);
pRNG5.SeedFixedIntegers(seeds);
typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField;
typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;
typename ImprovedStaggeredFermion5DR::ImplParams params;
FermionField src (FGrid);
random(pRNG5,src);
FermionField result(FGrid); result=zero;
FermionField ref(FGrid); ref=zero;
FermionField tmp(FGrid); tmp=zero;
FermionField err(FGrid); tmp=zero;
FermionField phi (FGrid); random(pRNG5,phi);
FermionField chi (FGrid); random(pRNG5,chi);
LatticeGaugeField Umu(UGrid); SU3::ColdConfiguration(pRNG4,Umu);
LatticeGaugeField Umua(UGrid); Umua=Umu;
double volume=Ls;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
////////////////////////////////////
// Naive implementation needs to
// replicate across fifth dimension
////////////////////////////////////
LatticeGaugeField Umu5d(FGrid);
for(int ss=0;ss<Umu._grid->oSites();ss++){
for(int s=0;s<Ls;s++){
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
}
}
std::vector<LatticeColourMatrix> U(4,FGrid);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
}
RealD mass=0.1;
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
{ // Simple improved staggered implementation
ref = zero;
RealD c1tad = 0.5*c1/u0;
RealD c2tad = 0.5*c2/u0/u0/u0;
Lattice<iScalar<vInteger> > coor(FGrid);
Lattice<iScalar<vInteger> > x(FGrid); LatticeCoordinate(x,1); // s innermost
Lattice<iScalar<vInteger> > y(FGrid); LatticeCoordinate(y,2);
Lattice<iScalar<vInteger> > z(FGrid); LatticeCoordinate(z,3);
Lattice<iScalar<vInteger> > t(FGrid); LatticeCoordinate(t,4);
Lattice<iScalar<vInteger> > lin_z(FGrid); lin_z=x+y;
Lattice<iScalar<vInteger> > lin_t(FGrid); lin_t=x+y+z;
for(int mu=0;mu<Nd;mu++){
// Staggered Phase.
ComplexField phases(FGrid); phases=1.0;
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
tmp = PeriodicBC::CovShiftForward(U[mu],mu+1,src);
ref = ref +c1tad*tmp*phases; // Forward 1 hop
tmp = PeriodicBC::CovShiftForward(U[mu],mu+1,tmp); // 2 hop
tmp = PeriodicBC::CovShiftForward(U[mu],mu+1,tmp); // 3 hop
ref = ref +c2tad*tmp*phases; // Forward 3 hop
tmp = PeriodicBC::CovShiftBackward(U[mu],mu+1,src);
ref = ref -c1tad*tmp*phases; // Forward 3 hop
tmp = PeriodicBC::CovShiftBackward(U[mu],mu+1,tmp);
tmp = PeriodicBC::CovShiftBackward(U[mu],mu+1,tmp);
ref = ref -c2tad*tmp*phases; // Forward 3 hop
}
}
ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0,params);
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Testing Dhop against cshift implementation "<<std::endl;
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
int ncall=100000;
double t0=usecond();
for(int i=0;i<ncall;i++){
Ds.Dhop(src,result,0);
}
double t1=usecond();
double t2;
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
// std::cout<<GridLogMessage << "result"<< result <<std::endl;
// std::cout<<GridLogMessage << "ref "<< ref <<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Testing that Deo + Doe = Dunprec "<<std::endl;
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
FermionField src_e (FrbGrid);
FermionField src_o (FrbGrid);
FermionField r_e (FrbGrid);
FermionField r_o (FrbGrid);
FermionField r_eo (FGrid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd,src_o,src);
Ds.Meooe(src_e,r_o); std::cout<<GridLogMessage<<"Applied Meo"<<std::endl;
Ds.Meooe(src_o,r_e); std::cout<<GridLogMessage<<"Applied Moe"<<std::endl;
Ds.Dhop (src,ref,DaggerNo);
setCheckerboard(r_eo,r_o);
setCheckerboard(r_eo,r_e);
err= ref - r_eo;
std::cout<<GridLogMessage << "EO norm diff "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Test Ddagger is the dagger of D by requiring "<<std::endl;
std::cout<<GridLogMessage<<"= < phi | Deo | chi > * = < chi | Deo^dag| phi> "<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
FermionField chi_e (FrbGrid);
FermionField chi_o (FrbGrid);
FermionField dchi_e (FrbGrid);
FermionField dchi_o (FrbGrid);
FermionField phi_e (FrbGrid);
FermionField phi_o (FrbGrid);
FermionField dphi_e (FrbGrid);
FermionField dphi_o (FrbGrid);
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
Ds.Meooe(chi_e,dchi_o);
Ds.Meooe(chi_o,dchi_e);
Ds.MeooeDag(phi_e,dphi_o);
Ds.MeooeDag(phi_o,dphi_e);
ComplexD pDce = innerProduct(phi_e,dchi_e);
ComplexD pDco = innerProduct(phi_o,dchi_o);
ComplexD cDpe = innerProduct(chi_e,dphi_e);
ComplexD cDpo = innerProduct(chi_o,dphi_o);
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Test MeeInv Mee = 1 "<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
Ds.Mooee(chi_e,src_e);
Ds.MooeeInv(src_e,phi_e);
Ds.Mooee(chi_o,src_o);
Ds.MooeeInv(src_o,phi_o);
setCheckerboard(phi,phi_e);
setCheckerboard(phi,phi_o);
err = phi-chi;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Test MeeInvDag MeeDag = 1 "<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
Ds.MooeeDag(chi_e,src_e);
Ds.MooeeInvDag(src_e,phi_e);
Ds.MooeeDag(chi_o,src_o);
Ds.MooeeInvDag(src_o,phi_o);
setCheckerboard(phi,phi_e);
setCheckerboard(phi,phi_o);
err = phi-chi;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Test MpcDagMpc is Hermitian "<<std::endl;
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
random(pRNG5,phi);
random(pRNG5,chi);
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
SchurDiagMooeeOperator<ImprovedStaggeredFermion5DR,FermionField> HermOpEO(Ds);
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
pDce = innerProduct(phi_e,dchi_e);
pDco = innerProduct(phi_o,dchi_o);
cDpe = innerProduct(chi_e,dphi_e);
cDpo = innerProduct(chi_o,dphi_o);
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,184 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_wilson.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
const int Ls=16;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG4(UGrid);
GridParallelRNG pRNG5(FGrid);
pRNG4.SeedFixedIntegers(seeds);
pRNG5.SeedFixedIntegers(seeds);
typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField;
typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;
typename ImprovedStaggeredFermion5DR::ImplParams params;
FermionField src (FGrid);
random(pRNG5,src);
/*
std::vector<int> site({0,1,2,0,0});
ColourVector cv = zero;
cv()()(0)=1.0;
src = zero;
pokeSite(cv,src,site);
*/
FermionField result(FGrid); result=zero;
FermionField tmp(FGrid); tmp=zero;
FermionField err(FGrid); tmp=zero;
FermionField phi (FGrid); random(pRNG5,phi);
FermionField chi (FGrid); random(pRNG5,chi);
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(pRNG4,Umu);
/*
for(int mu=1;mu<4;mu++){
auto tmp = PeekIndex<LorentzIndex>(Umu,mu);
tmp = zero;
PokeIndex<LorentzIndex>(Umu,tmp,mu);
}
*/
double volume=Ls;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
RealD mass=0.1;
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0,params);
ImprovedStaggeredFermionVec5dR sDs(Umu,Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,c1,c2,u0,params);
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
std::cout<<GridLogMessage<<"= Testing Dhop against cshift implementation "<<std::endl;
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
int ncall=1000;
int ncall1=1000;
double t0(0),t1(0);
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Calling staggered operator"<<std::endl;
t0=usecond();
for(int i=0;i<ncall1;i++){
Ds.Dhop(src,result,0);
}
t1=usecond();
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Calling vectorised staggered operator"<<std::endl;
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
t0=usecond();
for(int i=0;i<ncall1;i++){
Ds.Dhop(src,tmp,0);
}
t1=usecond();
std::cout<<GridLogMessage << "Called Ds ASM"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(tmp)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = tmp-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
FermionField ssrc (sFGrid); localConvert(src,ssrc);
FermionField sresult(sFGrid); sresult=zero;
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptHandUnroll;
t0=usecond();
for(int i=0;i<ncall1;i++){
sDs.Dhop(ssrc,sresult,0);
}
t1=usecond();
localConvert(sresult,tmp);
std::cout<<GridLogMessage << "Called sDs unroll"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(sresult)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
err = tmp-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
int extra=1;
t0=usecond();
for(int i=0;i<ncall1*extra;i++){
sDs.Dhop(ssrc,sresult,0);
}
t1=usecond();
localConvert(sresult,tmp);
std::cout<<GridLogMessage << "Called sDs asm"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(sresult)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)*extra<<std::endl;
err = tmp-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
Grid_finalize();
}

View File

@ -71,7 +71,7 @@ int main (int argc, char ** argv)
LatticeFermion ref(&Grid); ref=zero;
LatticeFermion tmp(&Grid); tmp=zero;
LatticeFermion err(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;

View File

@ -70,7 +70,7 @@ int main (int argc, char ** argv)
LatticeFermion ref(&Grid); ref=zero;
LatticeFermion tmp(&Grid); tmp=zero;
LatticeFermion err(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;

View File

@ -77,7 +77,7 @@ int main (int argc, char ** argv)
LatticeFermion ref(FGrid); ref=zero;
LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid);
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
#if 0
std::vector<LatticeColourMatrix> U(4,UGrid);

View File

@ -70,7 +70,7 @@ int main (int argc, char ** argv)
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
RealD mass=0.1;

View File

@ -187,7 +187,7 @@ int main(int argc,char **argv)
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
random(RNG5,src);
#if 1
random(RNG4,Umu);
SU3::HotConfiguration(RNG4,Umu);
#else
int mmu=2;
std::vector<LatticeColourMatrix> U(4,UGrid);

View File

@ -61,7 +61,7 @@ int main (int argc, char ** argv)
LatticeFermion src(FGrid); random(RNG5,src);
LatticeFermion result(FGrid); result=zero;
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
for(int mu=0;mu<Nd;mu++){

View File

@ -94,7 +94,7 @@ int main (int argc, char ** argv)
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
RealD mass=0.1;

View File

@ -61,7 +61,7 @@ int main (int argc, char ** argv)
LatticeFermion src(FGrid); random(RNG5,src);
LatticeFermion result(FGrid); result=zero;
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
for(int mu=0;mu<Nd;mu++){

View File

@ -61,7 +61,7 @@ int main (int argc, char ** argv)
LatticeFermion src(FGrid); random(RNG5,src);
LatticeFermion result(FGrid); result=zero;
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
for(int mu=0;mu<Nd;mu++){

View File

@ -65,7 +65,7 @@ int main (int argc, char ** argv)
LatticeFermion src(FGrid); random(RNG5,src);
LatticeFermion result(FGrid); result=zero;
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);

View File

@ -60,7 +60,7 @@ int main (int argc, char ** argv)
LatticeFermion src(&Grid); random(pRNG,src);
RealD nrm = norm2(src);
LatticeFermion result(&Grid); result=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);

View File

@ -57,7 +57,7 @@ int main (int argc, char ** argv)
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;

View File

@ -60,7 +60,7 @@ int main (int argc, char ** argv)
LatticeFermion src(&Grid); random(pRNG,src);
RealD nrm = norm2(src);
LatticeFermion result(&Grid); result=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
double volume=1;
for(int mu=0;mu<Nd;mu++){

View File

@ -60,7 +60,7 @@ int main (int argc, char ** argv)
LatticeFermion src(&Grid); random(pRNG,src);
RealD nrm = norm2(src);
LatticeFermion result(&Grid); result=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);