mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Merge branch 'feature/staggering' into develop
This commit is contained in:
commit
18bde08d1b
134
benchmarks/Benchmark_staggered.cc
Normal file
134
benchmarks/Benchmark_staggered.cc
Normal file
@ -0,0 +1,134 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./benchmarks/Benchmark_staggered.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG(&Grid);
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
// pRNG.SeedRandomDevice();
|
||||
|
||||
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
|
||||
typename ImprovedStaggeredFermionR::ImplParams params;
|
||||
|
||||
FermionField src (&Grid); random(pRNG,src);
|
||||
FermionField result(&Grid); result=zero;
|
||||
FermionField ref(&Grid); ref=zero;
|
||||
FermionField tmp(&Grid); tmp=zero;
|
||||
FermionField err(&Grid); tmp=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
double volume=1;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
volume=volume*latt_size[mu];
|
||||
}
|
||||
|
||||
// Only one non-zero (y)
|
||||
#if 0
|
||||
Umu=zero;
|
||||
Complex cone(1.0,0.0);
|
||||
for(int nn=0;nn<Nd;nn++){
|
||||
random(pRNG,U[nn]);
|
||||
if(1) {
|
||||
if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
|
||||
// else { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
|
||||
else { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
|
||||
}
|
||||
PokeIndex<LorentzIndex>(Umu,U[nn],nn);
|
||||
}
|
||||
#endif
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
ref = zero;
|
||||
/*
|
||||
{ // Naive wilson implementation
|
||||
ref = zero;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
|
||||
tmp = U[mu]*Cshift(src,mu,1);
|
||||
for(int i=0;i<ref._odata.size();i++){
|
||||
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||
}
|
||||
|
||||
tmp =adj(U[mu])*src;
|
||||
tmp =Cshift(tmp,mu,-1);
|
||||
for(int i=0;i<ref._odata.size();i++){
|
||||
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||
}
|
||||
}
|
||||
}
|
||||
ref = -0.5*ref;
|
||||
*/
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD c1=9.0/8.0;
|
||||
RealD c2=-1.0/24.0;
|
||||
RealD u0=1.0;
|
||||
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
|
||||
|
||||
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
|
||||
int ncall=1000;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
Ds.Dhop(src,result,0);
|
||||
}
|
||||
double t1=usecond();
|
||||
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
|
||||
|
||||
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -324,12 +324,15 @@ void Grid_init(int *argc,char ***argv)
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){
|
||||
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptHandUnroll;
|
||||
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptHandUnroll;
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){
|
||||
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptInlineAsm;
|
||||
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){
|
||||
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptGeneric;
|
||||
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptGeneric;
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
||||
LebesgueOrder::UseLebesgueOrder=1;
|
||||
@ -413,7 +416,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||
#endif
|
||||
#endif
|
||||
BACKTRACE();
|
||||
exit(0);
|
||||
if ( si->si_signo != SIGTRAP ) exit(0);
|
||||
return;
|
||||
};
|
||||
|
||||
|
@ -87,6 +87,7 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
assert(0);
|
||||
@ -97,7 +98,7 @@ void CartesianCommunicator::Barrier(void){}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
|
||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor ;}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor; }
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
{
|
||||
source =0;
|
||||
|
@ -45,6 +45,10 @@ namespace QCD {
|
||||
WilsonImplParams() : overlapCommsCompute(false) {};
|
||||
};
|
||||
|
||||
struct StaggeredImplParams {
|
||||
StaggeredImplParams() {};
|
||||
};
|
||||
|
||||
struct OneFlavourRationalParams {
|
||||
RealD lo;
|
||||
RealD hi;
|
||||
|
@ -53,6 +53,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/qcd/action/fermion/FermionOperatorImpl.h>
|
||||
#include <Grid/qcd/action/fermion/FermionOperator.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
||||
#include <Grid/qcd/action/fermion/StaggeredKernels.h> //used by all wilson type fermions
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Gauge Actions
|
||||
@ -108,6 +109,14 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
#define FermOpStaggeredTemplateInstantiate(A) \
|
||||
template class A<StaggeredImplF>; \
|
||||
template class A<StaggeredImplD>;
|
||||
|
||||
#define FermOpStaggeredVec5dTemplateInstantiate(A) \
|
||||
template class A<StaggeredVec5dImplF>; \
|
||||
template class A<StaggeredVec5dImplD>;
|
||||
|
||||
#define FermOp4dVecTemplateInstantiate(A) \
|
||||
template class A<WilsonImplF>; \
|
||||
template class A<WilsonImplD>; \
|
||||
@ -147,6 +156,9 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
|
||||
|
||||
//#include <Grid/qcd/action/fermion/CloverFermion.h>
|
||||
|
||||
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion.h>
|
||||
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
|
||||
|
||||
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> // Cayley types
|
||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
||||
@ -268,6 +280,17 @@ typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
|
||||
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
|
||||
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
|
||||
|
||||
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
|
||||
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
|
||||
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
|
||||
|
||||
typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR;
|
||||
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
|
||||
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
|
||||
|
||||
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplR> ImprovedStaggeredFermionVec5dR;
|
||||
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplF> ImprovedStaggeredFermionVec5dF;
|
||||
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermionVec5dD;
|
||||
|
||||
|
||||
}}
|
||||
|
@ -235,11 +235,13 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SitePropagator> PropagatorField;
|
||||
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Make the doubled gauge field a *scalar*
|
||||
/////////////////////////////////////////////////
|
||||
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
|
||||
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
|
||||
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
|
||||
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
@ -271,11 +273,11 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
{
|
||||
SiteScalarGaugeField ScalarUmu;
|
||||
SiteScalarGaugeField ScalarUmu;
|
||||
SiteDoubledGaugeField ScalarUds;
|
||||
|
||||
GaugeLinkField U(Umu._grid);
|
||||
GaugeField Uadj(Umu._grid);
|
||||
GaugeField Uadj(Umu._grid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
U = adj(Cshift(U, mu, -1));
|
||||
@ -356,7 +358,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
StencilImpl &St) {
|
||||
|
||||
typedef SiteHalfSpinor vobj;
|
||||
typedef typename SiteHalfSpinor::scalar_object sobj;
|
||||
typedef typename SiteHalfSpinor::scalar_object sobj;
|
||||
|
||||
vobj vtmp;
|
||||
sobj stmp;
|
||||
@ -512,6 +514,316 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour one component spinors with colour index
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
template <class S, class Representation = FundamentalRepresentation >
|
||||
class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
|
||||
|
||||
public:
|
||||
|
||||
typedef RealD _Coeff_t ;
|
||||
static const int Dimension = Representation::Dimension;
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||
|
||||
//Necessary?
|
||||
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
|
||||
|
||||
const bool LsVectorised=false;
|
||||
typedef _Coeff_t Coeff_t;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
|
||||
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
|
||||
template <typename vtype> using iImplHalfSpinor = iVector<iScalar<iVector<vtype, Dimension> >, Ngp>;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
|
||||
|
||||
typedef iImplScalar<Simd> SiteComplex;
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||
|
||||
typedef Lattice<SiteComplex> ComplexField;
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef SimpleCompressor<SiteSpinor> Compressor;
|
||||
typedef StaggeredImplParams ImplParams;
|
||||
typedef CartesianStencil<SiteSpinor, SiteSpinor> StencilImpl;
|
||||
|
||||
ImplParams Params;
|
||||
|
||||
StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||
|
||||
inline void multLink(SiteSpinor &phi,
|
||||
const SiteDoubledGaugeField &U,
|
||||
const SiteSpinor &chi,
|
||||
int mu){
|
||||
mult(&phi(), &U(mu), &chi());
|
||||
}
|
||||
inline void multLinkAdd(SiteSpinor &phi,
|
||||
const SiteDoubledGaugeField &U,
|
||||
const SiteSpinor &chi,
|
||||
int mu){
|
||||
mac(&phi(), &U(mu), &chi());
|
||||
}
|
||||
|
||||
template <class ref>
|
||||
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||
reg = memory;
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,
|
||||
DoubledGaugeField &UUUds, // for Naik term
|
||||
DoubledGaugeField &Uds,
|
||||
const GaugeField &Uthin,
|
||||
const GaugeField &Ufat) {
|
||||
conformable(Uds._grid, GaugeGrid);
|
||||
conformable(Uthin._grid, GaugeGrid);
|
||||
conformable(Ufat._grid, GaugeGrid);
|
||||
GaugeLinkField U(GaugeGrid);
|
||||
GaugeLinkField UU(GaugeGrid);
|
||||
GaugeLinkField UUU(GaugeGrid);
|
||||
GaugeLinkField Udag(GaugeGrid);
|
||||
GaugeLinkField UUUdag(GaugeGrid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
|
||||
// Staggered Phase.
|
||||
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||
Lattice<iScalar<vInteger> > x(GaugeGrid); LatticeCoordinate(x,0);
|
||||
Lattice<iScalar<vInteger> > y(GaugeGrid); LatticeCoordinate(y,1);
|
||||
Lattice<iScalar<vInteger> > z(GaugeGrid); LatticeCoordinate(z,2);
|
||||
Lattice<iScalar<vInteger> > t(GaugeGrid); LatticeCoordinate(t,3);
|
||||
|
||||
Lattice<iScalar<vInteger> > lin_z(GaugeGrid); lin_z=x+y;
|
||||
Lattice<iScalar<vInteger> > lin_t(GaugeGrid); lin_t=x+y+z;
|
||||
|
||||
ComplexField phases(GaugeGrid); phases=1.0;
|
||||
|
||||
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
|
||||
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
|
||||
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
|
||||
|
||||
// 1 hop based on fat links
|
||||
U = PeekIndex<LorentzIndex>(Ufat, mu);
|
||||
Udag = adj( Cshift(U, mu, -1));
|
||||
|
||||
U = U *phases;
|
||||
Udag = Udag *phases;
|
||||
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu);
|
||||
PokeIndex<LorentzIndex>(Uds, Udag, mu + 4);
|
||||
|
||||
// 3 hop based on thin links. Crazy huh ?
|
||||
U = PeekIndex<LorentzIndex>(Uthin, mu);
|
||||
UU = Gimpl::CovShiftForward(U,mu,U);
|
||||
UUU= Gimpl::CovShiftForward(U,mu,UU);
|
||||
|
||||
UUUdag = adj( Cshift(UUU, mu, -3));
|
||||
|
||||
UUU = UUU *phases;
|
||||
UUUdag = UUUdag *phases;
|
||||
|
||||
PokeIndex<LorentzIndex>(UUUds, UUU, mu);
|
||||
PokeIndex<LorentzIndex>(UUUds, UUUdag, mu+4);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||
GaugeLinkField link(mat._grid);
|
||||
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
||||
PokeIndex<LorentzIndex>(mat,link,mu);
|
||||
}
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
||||
assert (0);
|
||||
// Must never hit
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour one component spinors with colour index. 5d vec
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
template <class S, class Representation = FundamentalRepresentation >
|
||||
class StaggeredVec5dImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
|
||||
|
||||
public:
|
||||
|
||||
typedef RealD _Coeff_t ;
|
||||
static const int Dimension = Representation::Dimension;
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||
|
||||
//Necessary?
|
||||
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
|
||||
|
||||
const bool LsVectorised=true;
|
||||
|
||||
typedef _Coeff_t Coeff_t;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
|
||||
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
|
||||
template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
|
||||
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nd>;
|
||||
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
|
||||
|
||||
// Make the doubled gauge field a *scalar*
|
||||
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
|
||||
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
|
||||
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef iImplScalar<Simd> SiteComplex;
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
|
||||
|
||||
typedef Lattice<SiteComplex> ComplexField;
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
|
||||
typedef SimpleCompressor<SiteSpinor> Compressor;
|
||||
typedef StaggeredImplParams ImplParams;
|
||||
typedef CartesianStencil<SiteSpinor, SiteSpinor> StencilImpl;
|
||||
|
||||
ImplParams Params;
|
||||
|
||||
StaggeredVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||
|
||||
template <class ref>
|
||||
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||
vsplat(reg, memory);
|
||||
}
|
||||
|
||||
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi, int mu) {
|
||||
SiteGaugeLink UU;
|
||||
for (int i = 0; i < Dimension; i++) {
|
||||
for (int j = 0; j < Dimension; j++) {
|
||||
vsplat(UU()()(i, j), U(mu)()(i, j));
|
||||
}
|
||||
}
|
||||
mult(&phi(), &UU(), &chi());
|
||||
}
|
||||
inline void multLinkAdd(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi, int mu) {
|
||||
SiteGaugeLink UU;
|
||||
for (int i = 0; i < Dimension; i++) {
|
||||
for (int j = 0; j < Dimension; j++) {
|
||||
vsplat(UU()()(i, j), U(mu)()(i, j));
|
||||
}
|
||||
}
|
||||
mac(&phi(), &UU(), &chi());
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,
|
||||
DoubledGaugeField &UUUds, // for Naik term
|
||||
DoubledGaugeField &Uds,
|
||||
const GaugeField &Uthin,
|
||||
const GaugeField &Ufat)
|
||||
{
|
||||
|
||||
GridBase * InputGrid = Uthin._grid;
|
||||
conformable(InputGrid,Ufat._grid);
|
||||
|
||||
GaugeLinkField U(InputGrid);
|
||||
GaugeLinkField UU(InputGrid);
|
||||
GaugeLinkField UUU(InputGrid);
|
||||
GaugeLinkField Udag(InputGrid);
|
||||
GaugeLinkField UUUdag(InputGrid);
|
||||
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
|
||||
// Staggered Phase.
|
||||
Lattice<iScalar<vInteger> > coor(InputGrid);
|
||||
Lattice<iScalar<vInteger> > x(InputGrid); LatticeCoordinate(x,0);
|
||||
Lattice<iScalar<vInteger> > y(InputGrid); LatticeCoordinate(y,1);
|
||||
Lattice<iScalar<vInteger> > z(InputGrid); LatticeCoordinate(z,2);
|
||||
Lattice<iScalar<vInteger> > t(InputGrid); LatticeCoordinate(t,3);
|
||||
|
||||
Lattice<iScalar<vInteger> > lin_z(InputGrid); lin_z=x+y;
|
||||
Lattice<iScalar<vInteger> > lin_t(InputGrid); lin_t=x+y+z;
|
||||
|
||||
ComplexField phases(InputGrid); phases=1.0;
|
||||
|
||||
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
|
||||
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
|
||||
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
|
||||
|
||||
// 1 hop based on fat links
|
||||
U = PeekIndex<LorentzIndex>(Ufat, mu);
|
||||
Udag = adj( Cshift(U, mu, -1));
|
||||
|
||||
U = U *phases;
|
||||
Udag = Udag *phases;
|
||||
|
||||
|
||||
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
|
||||
SiteScalarGaugeLink ScalarU;
|
||||
SiteDoubledGaugeField ScalarUds;
|
||||
|
||||
std::vector<int> lcoor;
|
||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||
peekLocalSite(ScalarUds, Uds, lcoor);
|
||||
|
||||
peekLocalSite(ScalarU, U, lcoor);
|
||||
ScalarUds(mu) = ScalarU();
|
||||
|
||||
peekLocalSite(ScalarU, Udag, lcoor);
|
||||
ScalarUds(mu + 4) = ScalarU();
|
||||
|
||||
pokeLocalSite(ScalarUds, Uds, lcoor);
|
||||
}
|
||||
|
||||
// 3 hop based on thin links. Crazy huh ?
|
||||
U = PeekIndex<LorentzIndex>(Uthin, mu);
|
||||
UU = Gimpl::CovShiftForward(U,mu,U);
|
||||
UUU= Gimpl::CovShiftForward(U,mu,UU);
|
||||
|
||||
UUUdag = adj( Cshift(UUU, mu, -3));
|
||||
|
||||
UUU = UUU *phases;
|
||||
UUUdag = UUUdag *phases;
|
||||
|
||||
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
|
||||
|
||||
SiteScalarGaugeLink ScalarU;
|
||||
SiteDoubledGaugeField ScalarUds;
|
||||
|
||||
std::vector<int> lcoor;
|
||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||
|
||||
peekLocalSite(ScalarUds, UUUds, lcoor);
|
||||
|
||||
peekLocalSite(ScalarU, UUU, lcoor);
|
||||
ScalarUds(mu) = ScalarU();
|
||||
|
||||
peekLocalSite(ScalarU, UUUdag, lcoor);
|
||||
ScalarUds(mu + 4) = ScalarU();
|
||||
|
||||
pokeLocalSite(ScalarUds, UUUds, lcoor);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||
assert(0);
|
||||
}
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
||||
assert (0);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
|
||||
@ -540,6 +852,14 @@ PARALLEL_FOR_LOOP
|
||||
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
|
||||
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
|
||||
|
||||
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
|
||||
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
|
||||
typedef StaggeredImpl<vComplexD, FundamentalRepresentation > StaggeredImplD; // Double
|
||||
|
||||
typedef StaggeredVec5dImpl<vComplex, FundamentalRepresentation > StaggeredVec5dImplR; // Real.. whichever prec
|
||||
typedef StaggeredVec5dImpl<vComplexF, FundamentalRepresentation > StaggeredVec5dImplF; // Float
|
||||
typedef StaggeredVec5dImpl<vComplexD, FundamentalRepresentation > StaggeredVec5dImplD; // Double
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
|
356
lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
Normal file
356
lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
Normal file
@ -0,0 +1,356 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi, Peter Boyle
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
const std::vector<int>
|
||||
ImprovedStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
|
||||
const std::vector<int>
|
||||
ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
|
||||
|
||||
/////////////////////////////////
|
||||
// Constructor and gauge import
|
||||
/////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Ufat, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
RealD _c1, RealD _c2,RealD _u0,
|
||||
const ImplParams &p)
|
||||
: Kernels(p),
|
||||
_grid(&Fgrid),
|
||||
_cbgrid(&Hgrid),
|
||||
Stencil(&Fgrid, npoint, Even, directions, displacements),
|
||||
StencilEven(&Hgrid, npoint, Even, directions, displacements), // source is Even
|
||||
StencilOdd(&Hgrid, npoint, Odd, directions, displacements), // source is Odd
|
||||
mass(_mass),
|
||||
c1(_c1),
|
||||
c2(_c2),
|
||||
u0(_u0),
|
||||
Lebesgue(_grid),
|
||||
LebesgueEvenOdd(_cbgrid),
|
||||
Umu(&Fgrid),
|
||||
UmuEven(&Hgrid),
|
||||
UmuOdd(&Hgrid),
|
||||
UUUmu(&Fgrid),
|
||||
UUUmuEven(&Hgrid),
|
||||
UUUmuOdd(&Hgrid)
|
||||
{
|
||||
// Allocate the required comms buffer
|
||||
ImportGauge(_Uthin,_Ufat);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Momentum space propagator should be
|
||||
// https://arxiv.org/pdf/hep-lat/9712010.pdf
|
||||
//
|
||||
// mom space action.
|
||||
// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m
|
||||
//
|
||||
// must track through staggered flavour/spin reduction in literature to
|
||||
// turn to free propagator for the one component chi field, a la page 4/5
|
||||
// of above link to implmement fourier based solver.
|
||||
////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin)
|
||||
{
|
||||
ImportGauge(_Uthin,_Uthin);
|
||||
};
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat)
|
||||
{
|
||||
GaugeLinkField U(GaugeGrid());
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Double Store should take two fields for Naik and one hop separately.
|
||||
////////////////////////////////////////////////////////
|
||||
Impl::DoubleStore(GaugeGrid(), UUUmu, Umu, _Uthin, _Ufat );
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Apply scale factors to get the right fermion Kinetic term
|
||||
// Could pass coeffs into the double store to save work.
|
||||
// 0.5 ( U p(x+mu) - Udag(x-mu) p(x-mu) )
|
||||
////////////////////////////////////////////////////////
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
PokeIndex<LorentzIndex>(Umu, U*( 0.5*c1/u0), mu );
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu+4);
|
||||
PokeIndex<LorentzIndex>(Umu, U*(-0.5*c1/u0), mu+4);
|
||||
|
||||
U = PeekIndex<LorentzIndex>(UUUmu, mu);
|
||||
PokeIndex<LorentzIndex>(UUUmu, U*( 0.5*c2/u0/u0/u0), mu );
|
||||
|
||||
U = PeekIndex<LorentzIndex>(UUUmu, mu+4);
|
||||
PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4);
|
||||
}
|
||||
|
||||
pickCheckerboard(Even, UmuEven, Umu);
|
||||
pickCheckerboard(Odd, UmuOdd , Umu);
|
||||
pickCheckerboard(Even, UUUmuEven, UUUmu);
|
||||
pickCheckerboard(Odd, UUUmuOdd, UUUmu);
|
||||
}
|
||||
|
||||
/////////////////////////////
|
||||
// Implement the interface
|
||||
/////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
RealD ImprovedStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
Dhop(in, out, DaggerNo);
|
||||
return axpy_norm(out, mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
RealD ImprovedStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
Dhop(in, out, DaggerYes);
|
||||
return axpy_norm(out, mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||
if (in.checkerboard == Odd) {
|
||||
DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerNo);
|
||||
}
|
||||
}
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||
if (in.checkerboard == Odd) {
|
||||
DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerYes);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
typename FermionField::scalar_type scal(mass);
|
||||
out = scal * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
Mooee(in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
out = (1.0 / (mass)) * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in,
|
||||
FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
MooeeInv(in, out);
|
||||
}
|
||||
|
||||
///////////////////////////////////
|
||||
// Internal
|
||||
///////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
GaugeField & mat,
|
||||
const FermionField &A, const FermionField &B, int dag) {
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
Compressor compressor;
|
||||
|
||||
FermionField Btilde(B._grid);
|
||||
FermionField Atilde(B._grid);
|
||||
Atilde = A;
|
||||
|
||||
st.HaloExchange(B, compressor);
|
||||
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
|
||||
////////////////////////
|
||||
// Call the single hop
|
||||
////////////////////////
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < B._grid->oSites(); sss++) {
|
||||
Kernels::DhopDir(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1);
|
||||
}
|
||||
|
||||
// Force in three link terms
|
||||
//
|
||||
// Impl::InsertForce4D(mat, Btilde, Atilde, mu);
|
||||
//
|
||||
// dU_ac(x)/dt = i p_ab U_bc(x)
|
||||
//
|
||||
// => dS_f/dt = dS_f/dU_ac(x) . dU_ac(x)/dt = i p_ab U_bc(x) dS_f/dU_ac(x)
|
||||
//
|
||||
// One link: form fragments S_f = A U B
|
||||
//
|
||||
// write Btilde = U(x) B(x+mu)
|
||||
//
|
||||
// mat+= TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
||||
//
|
||||
// Three link: form fragments S_f = A UUU B
|
||||
//
|
||||
// mat+= outer ( A, UUUB) <-- Best take DhopDeriv with one linke or identity matrix
|
||||
// mat+= outer ( AU, UUB) <-- and then use covariant cshift?
|
||||
// mat+= outer ( AUU, UB) <-- Returned from call to DhopDir
|
||||
|
||||
assert(0);// need to figure out the force interface with a blasted three link term.
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
|
||||
conformable(U._grid, _grid);
|
||||
conformable(U._grid, V._grid);
|
||||
conformable(U._grid, mat._grid);
|
||||
|
||||
mat.checkerboard = U.checkerboard;
|
||||
|
||||
DerivInternal(Stencil, Umu, UUUmu, mat, U, V, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
|
||||
conformable(U._grid, _cbgrid);
|
||||
conformable(U._grid, V._grid);
|
||||
conformable(U._grid, mat._grid);
|
||||
|
||||
assert(V.checkerboard == Even);
|
||||
assert(U.checkerboard == Odd);
|
||||
mat.checkerboard = Odd;
|
||||
|
||||
DerivInternal(StencilEven, UmuOdd, UUUmuOdd, mat, U, V, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
|
||||
conformable(U._grid, _cbgrid);
|
||||
conformable(U._grid, V._grid);
|
||||
conformable(U._grid, mat._grid);
|
||||
|
||||
assert(V.checkerboard == Odd);
|
||||
assert(U.checkerboard == Even);
|
||||
mat.checkerboard = Even;
|
||||
|
||||
DerivInternal(StencilOdd, UmuEven, UUUmuEven, mat, U, V, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
|
||||
conformable(in._grid, _grid); // verifies full grid
|
||||
conformable(in._grid, out._grid);
|
||||
|
||||
out.checkerboard = in.checkerboard;
|
||||
|
||||
DhopInternal(Stencil, Lebesgue, Umu, UUUmu, in, out, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
|
||||
conformable(in._grid, _cbgrid); // verifies half grid
|
||||
conformable(in._grid, out._grid); // drops the cb check
|
||||
|
||||
assert(in.checkerboard == Even);
|
||||
out.checkerboard = Odd;
|
||||
|
||||
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, UUUmuOdd, in, out, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag) {
|
||||
conformable(in._grid, _cbgrid); // verifies half grid
|
||||
conformable(in._grid, out._grid); // drops the cb check
|
||||
|
||||
assert(in.checkerboard == Odd);
|
||||
out.checkerboard = Even;
|
||||
|
||||
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, UUUmuEven, in, out, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
|
||||
DhopDir(in, out, dir, disp);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) {
|
||||
|
||||
Compressor compressor;
|
||||
Stencil.HaloExchange(in, compressor);
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopDir(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp);
|
||||
}
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag) {
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
Compressor compressor;
|
||||
st.HaloExchange(in, compressor);
|
||||
|
||||
if (dag == DaggerYes) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion);
|
||||
|
||||
//AdjointFermOpTemplateInstantiate(ImprovedStaggeredFermion);
|
||||
//TwoIndexFermOpTemplateInstantiate(ImprovedStaggeredFermion);
|
||||
|
||||
}}
|
155
lib/qcd/action/fermion/ImprovedStaggeredFermion.h
Normal file
155
lib/qcd/action/fermion/ImprovedStaggeredFermion.h
Normal file
@ -0,0 +1,155 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/ImprovedStaggered.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi, Peter Boyle
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_IMPR_STAG_FERMION_H
|
||||
#define GRID_QCD_IMPR_STAG_FERMION_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
namespace QCD {
|
||||
|
||||
class ImprovedStaggeredFermionStatic {
|
||||
public:
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
static const int npoint = 16;
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedStaggeredFermionStatic {
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef StaggeredKernels<Impl> Kernels;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
GridBase *GaugeGrid(void) { return _grid; }
|
||||
GridBase *GaugeRedBlackGrid(void) { return _cbgrid; }
|
||||
GridBase *FermionGrid(void) { return _grid; }
|
||||
GridBase *FermionRedBlackGrid(void) { return _cbgrid; }
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// override multiply; cut number routines if pass dagger argument
|
||||
// and also make interface more uniformly consistent
|
||||
//////////////////////////////////////////////////////////////////
|
||||
RealD M(const FermionField &in, FermionField &out);
|
||||
RealD Mdag(const FermionField &in, FermionField &out);
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
// half checkerboard operations
|
||||
/////////////////////////////////////////////////////////
|
||||
void Meooe(const FermionField &in, FermionField &out);
|
||||
void MeooeDag(const FermionField &in, FermionField &out);
|
||||
void Mooee(const FermionField &in, FermionField &out);
|
||||
void MooeeDag(const FermionField &in, FermionField &out);
|
||||
void MooeeInv(const FermionField &in, FermionField &out);
|
||||
void MooeeInvDag(const FermionField &in, FermionField &out);
|
||||
|
||||
////////////////////////
|
||||
// Derivative interface
|
||||
////////////////////////
|
||||
// Interface calls an internal routine
|
||||
void DhopDeriv (GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||
void DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||
void DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// non-hermitian hopping term; half cb or both
|
||||
///////////////////////////////////////////////////////////////
|
||||
void Dhop (const FermionField &in, FermionField &out, int dag);
|
||||
void DhopOE(const FermionField &in, FermionField &out, int dag);
|
||||
void DhopEO(const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Multigrid assistance; force term uses too
|
||||
///////////////////////////////////////////////////////////////
|
||||
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Extra methods added by derived
|
||||
///////////////////////////////////////////////////////////////
|
||||
void DerivInternal(StencilImpl &st,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
GaugeField &mat,
|
||||
const FermionField &A, const FermionField &B, int dag);
|
||||
|
||||
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
// Constructor
|
||||
ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Ufat, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0,
|
||||
const ImplParams &p = ImplParams());
|
||||
|
||||
// DoubleStore impl dependent
|
||||
void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat);
|
||||
void ImportGauge(const GaugeField &_Uthin);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Data members require to support the functionality
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
// protected:
|
||||
public:
|
||||
// any other parameters of action ???
|
||||
|
||||
RealD mass;
|
||||
RealD u0;
|
||||
RealD c1;
|
||||
RealD c2;
|
||||
|
||||
GridBase *_grid;
|
||||
GridBase *_cbgrid;
|
||||
|
||||
// Defines the stencils for even and odd
|
||||
StencilImpl Stencil;
|
||||
StencilImpl StencilEven;
|
||||
StencilImpl StencilOdd;
|
||||
|
||||
// Copy of the gauge field , with even and odd subsets
|
||||
DoubledGaugeField Umu;
|
||||
DoubledGaugeField UmuEven;
|
||||
DoubledGaugeField UmuOdd;
|
||||
|
||||
DoubledGaugeField UUUmu;
|
||||
DoubledGaugeField UUUmuEven;
|
||||
DoubledGaugeField UUUmuOdd;
|
||||
|
||||
LebesgueOrder Lebesgue;
|
||||
LebesgueOrder LebesgueEvenOdd;
|
||||
};
|
||||
|
||||
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
|
||||
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
356
lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc
Normal file
356
lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc
Normal file
@ -0,0 +1,356 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
#include <PerfCount.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
const std::vector<int>
|
||||
ImprovedStaggeredFermion5DStatic::directions({1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4});
|
||||
const std::vector<int>
|
||||
ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
|
||||
|
||||
// 5d lattice for DWF.
|
||||
template<class Impl>
|
||||
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GaugeField &_Uthin,GaugeField &_Ufat,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mass,
|
||||
RealD _c1,RealD _c2, RealD _u0,
|
||||
const ImplParams &p) :
|
||||
Kernels(p),
|
||||
_FiveDimGrid (&FiveDimGrid),
|
||||
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
|
||||
_FourDimGrid (&FourDimGrid),
|
||||
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
|
||||
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
|
||||
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
|
||||
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
|
||||
mass(_mass),
|
||||
c1(_c1),
|
||||
c2(_c2),
|
||||
u0(_u0),
|
||||
Umu(_FourDimGrid),
|
||||
UmuEven(_FourDimRedBlackGrid),
|
||||
UmuOdd (_FourDimRedBlackGrid),
|
||||
UUUmu(_FourDimGrid),
|
||||
UUUmuEven(_FourDimRedBlackGrid),
|
||||
UUUmuOdd(_FourDimRedBlackGrid),
|
||||
Lebesgue(_FourDimGrid),
|
||||
LebesgueEvenOdd(_FourDimRedBlackGrid)
|
||||
{
|
||||
|
||||
// some assertions
|
||||
assert(FiveDimGrid._ndimension==5);
|
||||
assert(FourDimGrid._ndimension==4);
|
||||
assert(FourDimRedBlackGrid._ndimension==4);
|
||||
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
|
||||
|
||||
// extent of fifth dim and not spread out
|
||||
Ls=FiveDimGrid._fdimensions[0];
|
||||
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
||||
assert(FiveDimGrid._processors[0] ==1);
|
||||
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||
|
||||
// Other dimensions must match the decomposition of the four-D fields
|
||||
for(int d=0;d<4;d++){
|
||||
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
|
||||
|
||||
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
||||
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
||||
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
|
||||
|
||||
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
|
||||
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
|
||||
}
|
||||
|
||||
if (Impl::LsVectorised) {
|
||||
|
||||
int nsimd = Simd::Nsimd();
|
||||
|
||||
// Dimension zero of the five-d is the Ls direction
|
||||
assert(FiveDimGrid._simd_layout[0] ==nsimd);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
|
||||
|
||||
for(int d=0;d<4;d++){
|
||||
assert(FourDimGrid._simd_layout[d]=1);
|
||||
assert(FourDimRedBlackGrid._simd_layout[d]=1);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
// Dimension zero of the five-d is the Ls direction
|
||||
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
|
||||
assert(FiveDimGrid._simd_layout[0] ==1);
|
||||
|
||||
}
|
||||
|
||||
// Allocate the required comms buffer
|
||||
ImportGauge(_Uthin,_Ufat);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::ImportGauge(const GaugeField &_Uthin)
|
||||
{
|
||||
ImportGauge(_Uthin,_Uthin);
|
||||
};
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat)
|
||||
{
|
||||
////////////////////////////////////////////////////////
|
||||
// Double Store should take two fields for Naik and one hop separately.
|
||||
////////////////////////////////////////////////////////
|
||||
Impl::DoubleStore(GaugeGrid(), UUUmu, Umu, _Uthin, _Ufat );
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Apply scale factors to get the right fermion Kinetic term
|
||||
// Could pass coeffs into the double store to save work.
|
||||
// 0.5 ( U p(x+mu) - Udag(x-mu) p(x-mu) )
|
||||
////////////////////////////////////////////////////////
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
|
||||
auto U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
PokeIndex<LorentzIndex>(Umu, U*( 0.5*c1/u0), mu );
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu+4);
|
||||
PokeIndex<LorentzIndex>(Umu, U*(-0.5*c1/u0), mu+4);
|
||||
|
||||
U = PeekIndex<LorentzIndex>(UUUmu, mu);
|
||||
PokeIndex<LorentzIndex>(UUUmu, U*( 0.5*c2/u0/u0/u0), mu );
|
||||
|
||||
U = PeekIndex<LorentzIndex>(UUUmu, mu+4);
|
||||
PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4);
|
||||
}
|
||||
|
||||
pickCheckerboard(Even, UmuEven, Umu);
|
||||
pickCheckerboard(Odd, UmuOdd , Umu);
|
||||
pickCheckerboard(Even, UUUmuEven, UUUmu);
|
||||
pickCheckerboard(Odd, UUUmuOdd, UUUmu);
|
||||
}
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp)
|
||||
{
|
||||
int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
|
||||
// we drop off the innermost fifth dimension
|
||||
|
||||
Compressor compressor;
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<Umu._grid->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sU=ss;
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DhopDir(Stencil, Umu, UUUmu, Stencil.CommBuf(), sF, sU, in, out, dir, disp);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||
DoubledGaugeField & U,
|
||||
DoubledGaugeField & UUU,
|
||||
GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
// No force terms in multi-rhs solver staggered
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopDeriv(GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
Compressor compressor;
|
||||
int LLs = in._grid->_rdimensions[0];
|
||||
st.HaloExchange(in,compressor);
|
||||
|
||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||
if (dag == DaggerYes) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
int sU=ss;
|
||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
int sU=ss;
|
||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in._grid,out._grid); // drops the cb check
|
||||
|
||||
assert(in.checkerboard==Even);
|
||||
out.checkerboard = Odd;
|
||||
|
||||
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,UUUmuOdd,in,out,dag);
|
||||
}
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in._grid,out._grid); // drops the cb check
|
||||
|
||||
assert(in.checkerboard==Odd);
|
||||
out.checkerboard = Even;
|
||||
|
||||
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,UUUmuEven,in,out,dag);
|
||||
}
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
conformable(in._grid,FermionGrid()); // verifies full grid
|
||||
conformable(in._grid,out._grid);
|
||||
|
||||
out.checkerboard = in.checkerboard;
|
||||
|
||||
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Implement the general interface. Here we use SAME mass on all slices
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
|
||||
DhopDir(in, out, dir, disp);
|
||||
}
|
||||
template <class Impl>
|
||||
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
Dhop(in, out, DaggerNo);
|
||||
return axpy_norm(out, mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
RealD ImprovedStaggeredFermion5D<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
Dhop(in, out, DaggerYes);
|
||||
return axpy_norm(out, mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||
if (in.checkerboard == Odd) {
|
||||
DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerNo);
|
||||
}
|
||||
}
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||
if (in.checkerboard == Odd) {
|
||||
DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerYes);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
typename FermionField::scalar_type scal(mass);
|
||||
out = scal * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
Mooee(in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
out = (1.0 / (mass)) * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::MooeeInvDag(const FermionField &in,
|
||||
FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
MooeeInv(in, out);
|
||||
}
|
||||
|
||||
|
||||
FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion5D);
|
||||
FermOpStaggeredVec5dTemplateInstantiate(ImprovedStaggeredFermion5D);
|
||||
|
||||
}}
|
||||
|
||||
|
||||
|
164
lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h
Normal file
164
lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h
Normal file
@ -0,0 +1,164 @@
|
||||
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: AzusaYamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H
|
||||
#define GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// This is the 4d red black case appropriate to support
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class ImprovedStaggeredFermion5DStatic {
|
||||
public:
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
const int npoint = 16;
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
class ImprovedStaggeredFermion5D : public StaggeredKernels<Impl>, public ImprovedStaggeredFermion5DStatic
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef StaggeredKernels<Impl> Kernels;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
|
||||
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
|
||||
GridBase *FermionGrid(void) { return _FiveDimGrid;}
|
||||
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
|
||||
|
||||
// full checkerboard operations; leave unimplemented as abstract for now
|
||||
RealD M (const FermionField &in, FermionField &out);
|
||||
RealD Mdag (const FermionField &in, FermionField &out);
|
||||
|
||||
// half checkerboard operations
|
||||
void Meooe (const FermionField &in, FermionField &out);
|
||||
void Mooee (const FermionField &in, FermionField &out);
|
||||
void MooeeInv (const FermionField &in, FermionField &out);
|
||||
|
||||
void MeooeDag (const FermionField &in, FermionField &out);
|
||||
void MooeeDag (const FermionField &in, FermionField &out);
|
||||
void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||
|
||||
void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
||||
|
||||
// These can be overridden by fancy 5d chiral action
|
||||
void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
|
||||
// Implement hopping term non-hermitian hopping term; half cb or both
|
||||
void Dhop (const FermionField &in, FermionField &out,int dag);
|
||||
void DhopOE(const FermionField &in, FermionField &out,int dag);
|
||||
void DhopEO(const FermionField &in, FermionField &out,int dag);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// New methods added
|
||||
///////////////////////////////////////////////////////////////
|
||||
void DerivInternal(StencilImpl & st,
|
||||
DoubledGaugeField & U,
|
||||
DoubledGaugeField & UUU,
|
||||
GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag);
|
||||
|
||||
void DhopInternal(StencilImpl & st,
|
||||
LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
const FermionField &in,
|
||||
FermionField &out,
|
||||
int dag);
|
||||
|
||||
// Constructors
|
||||
ImprovedStaggeredFermion5D(GaugeField &_Uthin,
|
||||
GaugeField &_Ufat,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
double _mass,
|
||||
RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0,
|
||||
const ImplParams &p= ImplParams());
|
||||
|
||||
// DoubleStore
|
||||
void ImportGauge(const GaugeField &_U);
|
||||
void ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Data members require to support the functionality
|
||||
///////////////////////////////////////////////////////////////
|
||||
public:
|
||||
|
||||
GridBase *_FourDimGrid;
|
||||
GridBase *_FourDimRedBlackGrid;
|
||||
GridBase *_FiveDimGrid;
|
||||
GridBase *_FiveDimRedBlackGrid;
|
||||
|
||||
RealD mass;
|
||||
RealD c1;
|
||||
RealD c2;
|
||||
RealD u0;
|
||||
int Ls;
|
||||
|
||||
//Defines the stencils for even and odd
|
||||
StencilImpl Stencil;
|
||||
StencilImpl StencilEven;
|
||||
StencilImpl StencilOdd;
|
||||
|
||||
// Copy of the gauge field , with even and odd subsets
|
||||
DoubledGaugeField Umu;
|
||||
DoubledGaugeField UmuEven;
|
||||
DoubledGaugeField UmuOdd;
|
||||
|
||||
DoubledGaugeField UUUmu;
|
||||
DoubledGaugeField UUUmuEven;
|
||||
DoubledGaugeField UUUmuOdd;
|
||||
|
||||
LebesgueOrder Lebesgue;
|
||||
LebesgueOrder LebesgueEvenOdd;
|
||||
|
||||
// Comms buffer
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
|
||||
|
||||
};
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
273
lib/qcd/action/fermion/StaggeredKernels.cc
Normal file
273
lib/qcd/action/fermion/StaggeredKernels.cc
Normal file
@ -0,0 +1,273 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi, Peter Boyle
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
int StaggeredKernelsStatic::Opt;
|
||||
|
||||
template <class Impl>
|
||||
StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Generic implementation; move to different file?
|
||||
////////////////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink) {
|
||||
const SiteSpinor *chi_p;
|
||||
SiteSpinor chi;
|
||||
SiteSpinor Uchi;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int skew = 0;
|
||||
if (threeLink) skew=8;
|
||||
///////////////////////////
|
||||
// Xp
|
||||
///////////////////////////
|
||||
|
||||
SE = st.GetEntry(ptype, Xp+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp);
|
||||
|
||||
///////////////////////////
|
||||
// Yp
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Yp+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Yp);
|
||||
|
||||
///////////////////////////
|
||||
// Zp
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Zp+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Zp);
|
||||
|
||||
///////////////////////////
|
||||
// Tp
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Tp+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Tp);
|
||||
|
||||
///////////////////////////
|
||||
// Xm
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Xm+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Xm);
|
||||
|
||||
///////////////////////////
|
||||
// Ym
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Ym+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Ym);
|
||||
|
||||
///////////////////////////
|
||||
// Zm
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Zm+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Zm);
|
||||
|
||||
///////////////////////////
|
||||
// Tm
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Tm+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Tm);
|
||||
|
||||
vstream(out, Uchi);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out) {
|
||||
SiteSpinor naik;
|
||||
SiteSpinor naive;
|
||||
int oneLink =0;
|
||||
int threeLink=1;
|
||||
int dag=1;
|
||||
switch(Opt) {
|
||||
#ifdef AVX512
|
||||
//FIXME; move the sign into the Asm routine
|
||||
case OptInlineAsm:
|
||||
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out);
|
||||
for(int s=0;s<LLs;s++) {
|
||||
int sF=s+LLs*sU;
|
||||
out._odata[sF]=-out._odata[sF];
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
case OptHandUnroll:
|
||||
DhopSiteHand(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
break;
|
||||
case OptGeneric:
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||
out._odata[sF] =-naive-naik;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
int oneLink =0;
|
||||
int threeLink=1;
|
||||
SiteSpinor naik;
|
||||
SiteSpinor naive;
|
||||
int dag=0;
|
||||
switch(Opt) {
|
||||
#ifdef AVX512
|
||||
case OptInlineAsm:
|
||||
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out);
|
||||
break;
|
||||
#endif
|
||||
case OptHandUnroll:
|
||||
DhopSiteHand(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
break;
|
||||
case OptGeneric:
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
// assert(sF<in._odata.size());
|
||||
// assert(sU< U._odata.size());
|
||||
// assert(sF>=0); assert(sU>=0);
|
||||
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||
out._odata[sF] =naive+naik;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out, int dir, int disp)
|
||||
{
|
||||
// Disp should be either +1,-1,+3,-3
|
||||
// What about "dag" ?
|
||||
// Because we work out pU . dS/dU
|
||||
// U
|
||||
assert(0);
|
||||
}
|
||||
|
||||
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
|
||||
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
|
||||
|
||||
}}
|
||||
|
83
lib/qcd/action/fermion/StaggeredKernels.h
Normal file
83
lib/qcd/action/fermion/StaggeredKernels.h
Normal file
@ -0,0 +1,83 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/StaggeredKernels.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi, Peter Boyle
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_STAGGERED_KERNELS_H
|
||||
#define GRID_QCD_STAGGERED_KERNELS_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper routines that implement Staggered stencil for a single site.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class StaggeredKernelsStatic {
|
||||
public:
|
||||
enum { OptGeneric, OptHandUnroll, OptInlineAsm };
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static int Opt; // these are a temporary hack
|
||||
};
|
||||
|
||||
template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , public StaggeredKernelsStatic {
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef FermionOperator<Impl> Base;
|
||||
|
||||
public:
|
||||
|
||||
void DhopDir(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
|
||||
|
||||
void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink);
|
||||
|
||||
|
||||
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink);
|
||||
|
||||
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf,
|
||||
int LLs, int sU, const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||
int LLs, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf,
|
||||
int LLs, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
public:
|
||||
|
||||
StaggeredKernels(const ImplParams &p = ImplParams());
|
||||
|
||||
};
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
910
lib/qcd/action/fermion/StaggeredKernelsAsm.cc
Normal file
910
lib/qcd/action/fermion/StaggeredKernelsAsm.cc
Normal file
@ -0,0 +1,910 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
#include <simd/Intel512common.h>
|
||||
#include <simd/Intel512avx.h>
|
||||
|
||||
// Interleave operations from two directions
|
||||
// This looks just like a 2 spin multiply and reuse same sequence from the Wilson
|
||||
// Kernel. But the spin index becomes a mu index instead.
|
||||
#define Chi_00 %zmm0
|
||||
#define Chi_01 %zmm1
|
||||
#define Chi_02 %zmm2
|
||||
#define Chi_10 %zmm3
|
||||
#define Chi_11 %zmm4
|
||||
#define Chi_12 %zmm5
|
||||
#define Chi_20 %zmm6
|
||||
#define Chi_21 %zmm7
|
||||
#define Chi_22 %zmm8
|
||||
#define Chi_30 %zmm9
|
||||
#define Chi_31 %zmm10
|
||||
#define Chi_32 %zmm11
|
||||
|
||||
#define UChi_00 %zmm12
|
||||
#define UChi_01 %zmm13
|
||||
#define UChi_02 %zmm14
|
||||
#define UChi_10 %zmm15
|
||||
#define UChi_11 %zmm16
|
||||
#define UChi_12 %zmm17
|
||||
#define UChi_20 %zmm18
|
||||
#define UChi_21 %zmm19
|
||||
#define UChi_22 %zmm20
|
||||
#define UChi_30 %zmm21
|
||||
#define UChi_31 %zmm22
|
||||
#define UChi_32 %zmm23
|
||||
|
||||
#define pChi_00 %%zmm0
|
||||
#define pChi_01 %%zmm1
|
||||
#define pChi_02 %%zmm2
|
||||
#define pChi_10 %%zmm3
|
||||
#define pChi_11 %%zmm4
|
||||
#define pChi_12 %%zmm5
|
||||
#define pChi_20 %%zmm6
|
||||
#define pChi_21 %%zmm7
|
||||
#define pChi_22 %%zmm8
|
||||
#define pChi_30 %%zmm9
|
||||
#define pChi_31 %%zmm10
|
||||
#define pChi_32 %%zmm11
|
||||
|
||||
#define pUChi_00 %%zmm12
|
||||
#define pUChi_01 %%zmm13
|
||||
#define pUChi_02 %%zmm14
|
||||
#define pUChi_10 %%zmm15
|
||||
#define pUChi_11 %%zmm16
|
||||
#define pUChi_12 %%zmm17
|
||||
#define pUChi_20 %%zmm18
|
||||
#define pUChi_21 %%zmm19
|
||||
#define pUChi_22 %%zmm20
|
||||
#define pUChi_30 %%zmm21
|
||||
#define pUChi_31 %%zmm22
|
||||
#define pUChi_32 %%zmm23
|
||||
|
||||
#define T0 %zmm24
|
||||
#define T1 %zmm25
|
||||
#define T2 %zmm26
|
||||
#define T3 %zmm27
|
||||
|
||||
#define Z00 %zmm26
|
||||
#define Z10 %zmm27
|
||||
#define Z0 Z00
|
||||
#define Z1 %zmm28
|
||||
#define Z2 %zmm29
|
||||
|
||||
#define Z3 %zmm30
|
||||
#define Z4 %zmm31
|
||||
#define Z5 Chi_31
|
||||
#define Z6 Chi_32
|
||||
|
||||
#define MULT_ADD_LS(g0,g1,g2,g3) \
|
||||
asm ( "movq %0, %%r8 \n\t" \
|
||||
"movq %1, %%r9 \n\t" \
|
||||
"movq %2, %%r10 \n\t" \
|
||||
"movq %3, %%r11 \n\t" : : "r"(g0), "r"(g1), "r"(g2), "r"(g3) : "%r8","%r9","%r10","%r11" );\
|
||||
asm ( \
|
||||
VSHUF(Chi_00,T0) VSHUF(Chi_10,T1) \
|
||||
VSHUF(Chi_20,T2) VSHUF(Chi_30,T3) \
|
||||
VMADDSUBIDUP(0,%r8,T0,UChi_00) VMADDSUBIDUP(0,%r9,T1,UChi_10) \
|
||||
VMADDSUBIDUP(3,%r8,T0,UChi_01) VMADDSUBIDUP(3,%r9,T1,UChi_11) \
|
||||
VMADDSUBIDUP(6,%r8,T0,UChi_02) VMADDSUBIDUP(6,%r9,T1,UChi_12) \
|
||||
VMADDSUBIDUP(0,%r10,T2,UChi_20) VMADDSUBIDUP(0,%r11,T3,UChi_30) \
|
||||
VMADDSUBIDUP(3,%r10,T2,UChi_21) VMADDSUBIDUP(3,%r11,T3,UChi_31) \
|
||||
VMADDSUBIDUP(6,%r10,T2,UChi_22) VMADDSUBIDUP(6,%r11,T3,UChi_32) \
|
||||
VMADDSUBRDUP(0,%r8,Chi_00,UChi_00) VMADDSUBRDUP(0,%r9,Chi_10,UChi_10) \
|
||||
VMADDSUBRDUP(3,%r8,Chi_00,UChi_01) VMADDSUBRDUP(3,%r9,Chi_10,UChi_11) \
|
||||
VMADDSUBRDUP(6,%r8,Chi_00,UChi_02) VMADDSUBRDUP(6,%r9,Chi_10,UChi_12) \
|
||||
VMADDSUBRDUP(0,%r10,Chi_20,UChi_20) VMADDSUBRDUP(0,%r11,Chi_30,UChi_30) \
|
||||
VMADDSUBRDUP(3,%r10,Chi_20,UChi_21) VMADDSUBRDUP(3,%r11,Chi_30,UChi_31) \
|
||||
VMADDSUBRDUP(6,%r10,Chi_20,UChi_22) VMADDSUBRDUP(6,%r11,Chi_30,UChi_32) \
|
||||
VSHUF(Chi_01,T0) VSHUF(Chi_11,T1) \
|
||||
VSHUF(Chi_21,T2) VSHUF(Chi_31,T3) \
|
||||
VMADDSUBIDUP(1,%r8,T0,UChi_00) VMADDSUBIDUP(1,%r9,T1,UChi_10) \
|
||||
VMADDSUBIDUP(4,%r8,T0,UChi_01) VMADDSUBIDUP(4,%r9,T1,UChi_11) \
|
||||
VMADDSUBIDUP(7,%r8,T0,UChi_02) VMADDSUBIDUP(7,%r9,T1,UChi_12) \
|
||||
VMADDSUBIDUP(1,%r10,T2,UChi_20) VMADDSUBIDUP(1,%r11,T3,UChi_30) \
|
||||
VMADDSUBIDUP(4,%r10,T2,UChi_21) VMADDSUBIDUP(4,%r11,T3,UChi_31) \
|
||||
VMADDSUBIDUP(7,%r10,T2,UChi_22) VMADDSUBIDUP(7,%r11,T3,UChi_32) \
|
||||
VMADDSUBRDUP(1,%r8,Chi_01,UChi_00) VMADDSUBRDUP(1,%r9,Chi_11,UChi_10) \
|
||||
VMADDSUBRDUP(4,%r8,Chi_01,UChi_01) VMADDSUBRDUP(4,%r9,Chi_11,UChi_11) \
|
||||
VMADDSUBRDUP(7,%r8,Chi_01,UChi_02) VMADDSUBRDUP(7,%r9,Chi_11,UChi_12) \
|
||||
VMADDSUBRDUP(1,%r10,Chi_21,UChi_20) VMADDSUBRDUP(1,%r11,Chi_31,UChi_30) \
|
||||
VMADDSUBRDUP(4,%r10,Chi_21,UChi_21) VMADDSUBRDUP(4,%r11,Chi_31,UChi_31) \
|
||||
VMADDSUBRDUP(7,%r10,Chi_21,UChi_22) VMADDSUBRDUP(7,%r11,Chi_31,UChi_32) \
|
||||
VSHUF(Chi_02,T0) VSHUF(Chi_12,T1) \
|
||||
VSHUF(Chi_22,T2) VSHUF(Chi_32,T3) \
|
||||
VMADDSUBIDUP(2,%r8,T0,UChi_00) VMADDSUBIDUP(2,%r9,T1,UChi_10) \
|
||||
VMADDSUBIDUP(5,%r8,T0,UChi_01) VMADDSUBIDUP(5,%r9,T1,UChi_11) \
|
||||
VMADDSUBIDUP(8,%r8,T0,UChi_02) VMADDSUBIDUP(8,%r9,T1,UChi_12) \
|
||||
VMADDSUBIDUP(2,%r10,T2,UChi_20) VMADDSUBIDUP(2,%r11,T3,UChi_30) \
|
||||
VMADDSUBIDUP(5,%r10,T2,UChi_21) VMADDSUBIDUP(5,%r11,T3,UChi_31) \
|
||||
VMADDSUBIDUP(8,%r10,T2,UChi_22) VMADDSUBIDUP(8,%r11,T3,UChi_32) \
|
||||
VMADDSUBRDUP(2,%r8,Chi_02,UChi_00) VMADDSUBRDUP(2,%r9,Chi_12,UChi_10) \
|
||||
VMADDSUBRDUP(5,%r8,Chi_02,UChi_01) VMADDSUBRDUP(5,%r9,Chi_12,UChi_11) \
|
||||
VMADDSUBRDUP(8,%r8,Chi_02,UChi_02) VMADDSUBRDUP(8,%r9,Chi_12,UChi_12) \
|
||||
VMADDSUBRDUP(2,%r10,Chi_22,UChi_20) VMADDSUBRDUP(2,%r11,Chi_32,UChi_30) \
|
||||
VMADDSUBRDUP(5,%r10,Chi_22,UChi_21) VMADDSUBRDUP(5,%r11,Chi_32,UChi_31) \
|
||||
VMADDSUBRDUP(8,%r10,Chi_22,UChi_22) VMADDSUBRDUP(8,%r11,Chi_32,UChi_32) );
|
||||
|
||||
#define MULT_LS(g0,g1,g2,g3) \
|
||||
asm ( "movq %0, %%r8 \n\t" \
|
||||
"movq %1, %%r9 \n\t" \
|
||||
"movq %2, %%r10 \n\t" \
|
||||
"movq %3, %%r11 \n\t" : : "r"(g0), "r"(g1), "r"(g2), "r"(g3) : "%r8","%r9","%r10","%r11" );\
|
||||
asm ( \
|
||||
VSHUF(Chi_00,T0) VSHUF(Chi_10,T1) \
|
||||
VSHUF(Chi_20,T2) VSHUF(Chi_30,T3) \
|
||||
VMULIDUP(0,%r8,T0,UChi_00) VMULIDUP(0,%r9,T1,UChi_10) \
|
||||
VMULIDUP(3,%r8,T0,UChi_01) VMULIDUP(3,%r9,T1,UChi_11) \
|
||||
VMULIDUP(6,%r8,T0,UChi_02) VMULIDUP(6,%r9,T1,UChi_12) \
|
||||
VMULIDUP(0,%r10,T2,UChi_20) VMULIDUP(0,%r11,T3,UChi_30) \
|
||||
VMULIDUP(3,%r10,T2,UChi_21) VMULIDUP(3,%r11,T3,UChi_31) \
|
||||
VMULIDUP(6,%r10,T2,UChi_22) VMULIDUP(6,%r11,T3,UChi_32) \
|
||||
VMADDSUBRDUP(0,%r8,Chi_00,UChi_00) VMADDSUBRDUP(0,%r9,Chi_10,UChi_10) \
|
||||
VMADDSUBRDUP(3,%r8,Chi_00,UChi_01) VMADDSUBRDUP(3,%r9,Chi_10,UChi_11) \
|
||||
VMADDSUBRDUP(6,%r8,Chi_00,UChi_02) VMADDSUBRDUP(6,%r9,Chi_10,UChi_12) \
|
||||
VMADDSUBRDUP(0,%r10,Chi_20,UChi_20) VMADDSUBRDUP(0,%r11,Chi_30,UChi_30) \
|
||||
VMADDSUBRDUP(3,%r10,Chi_20,UChi_21) VMADDSUBRDUP(3,%r11,Chi_30,UChi_31) \
|
||||
VMADDSUBRDUP(6,%r10,Chi_20,UChi_22) VMADDSUBRDUP(6,%r11,Chi_30,UChi_32) \
|
||||
VSHUF(Chi_01,T0) VSHUF(Chi_11,T1) \
|
||||
VSHUF(Chi_21,T2) VSHUF(Chi_31,T3) \
|
||||
VMADDSUBIDUP(1,%r8,T0,UChi_00) VMADDSUBIDUP(1,%r9,T1,UChi_10) \
|
||||
VMADDSUBIDUP(4,%r8,T0,UChi_01) VMADDSUBIDUP(4,%r9,T1,UChi_11) \
|
||||
VMADDSUBIDUP(7,%r8,T0,UChi_02) VMADDSUBIDUP(7,%r9,T1,UChi_12) \
|
||||
VMADDSUBIDUP(1,%r10,T2,UChi_20) VMADDSUBIDUP(1,%r11,T3,UChi_30) \
|
||||
VMADDSUBIDUP(4,%r10,T2,UChi_21) VMADDSUBIDUP(4,%r11,T3,UChi_31) \
|
||||
VMADDSUBIDUP(7,%r10,T2,UChi_22) VMADDSUBIDUP(7,%r11,T3,UChi_32) \
|
||||
VMADDSUBRDUP(1,%r8,Chi_01,UChi_00) VMADDSUBRDUP(1,%r9,Chi_11,UChi_10) \
|
||||
VMADDSUBRDUP(4,%r8,Chi_01,UChi_01) VMADDSUBRDUP(4,%r9,Chi_11,UChi_11) \
|
||||
VMADDSUBRDUP(7,%r8,Chi_01,UChi_02) VMADDSUBRDUP(7,%r9,Chi_11,UChi_12) \
|
||||
VMADDSUBRDUP(1,%r10,Chi_21,UChi_20) VMADDSUBRDUP(1,%r11,Chi_31,UChi_30) \
|
||||
VMADDSUBRDUP(4,%r10,Chi_21,UChi_21) VMADDSUBRDUP(4,%r11,Chi_31,UChi_31) \
|
||||
VMADDSUBRDUP(7,%r10,Chi_21,UChi_22) VMADDSUBRDUP(7,%r11,Chi_31,UChi_32) \
|
||||
VSHUF(Chi_02,T0) VSHUF(Chi_12,T1) \
|
||||
VSHUF(Chi_22,T2) VSHUF(Chi_32,T3) \
|
||||
VMADDSUBIDUP(2,%r8,T0,UChi_00) VMADDSUBIDUP(2,%r9,T1,UChi_10) \
|
||||
VMADDSUBIDUP(5,%r8,T0,UChi_01) VMADDSUBIDUP(5,%r9,T1,UChi_11) \
|
||||
VMADDSUBIDUP(8,%r8,T0,UChi_02) VMADDSUBIDUP(8,%r9,T1,UChi_12) \
|
||||
VMADDSUBIDUP(2,%r10,T2,UChi_20) VMADDSUBIDUP(2,%r11,T3,UChi_30) \
|
||||
VMADDSUBIDUP(5,%r10,T2,UChi_21) VMADDSUBIDUP(5,%r11,T3,UChi_31) \
|
||||
VMADDSUBIDUP(8,%r10,T2,UChi_22) VMADDSUBIDUP(8,%r11,T3,UChi_32) \
|
||||
VMADDSUBRDUP(2,%r8,Chi_02,UChi_00) VMADDSUBRDUP(2,%r9,Chi_12,UChi_10) \
|
||||
VMADDSUBRDUP(5,%r8,Chi_02,UChi_01) VMADDSUBRDUP(5,%r9,Chi_12,UChi_11) \
|
||||
VMADDSUBRDUP(8,%r8,Chi_02,UChi_02) VMADDSUBRDUP(8,%r9,Chi_12,UChi_12) \
|
||||
VMADDSUBRDUP(2,%r10,Chi_22,UChi_20) VMADDSUBRDUP(2,%r11,Chi_32,UChi_30) \
|
||||
VMADDSUBRDUP(5,%r10,Chi_22,UChi_21) VMADDSUBRDUP(5,%r11,Chi_32,UChi_31) \
|
||||
VMADDSUBRDUP(8,%r10,Chi_22,UChi_22) VMADDSUBRDUP(8,%r11,Chi_32,UChi_32) );
|
||||
|
||||
#define MULT_ADD_XYZTa(g0,g1) \
|
||||
asm ( "movq %0, %%r8 \n\t" \
|
||||
"movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9");\
|
||||
__asm__ ( \
|
||||
VSHUF(Chi_00,T0) \
|
||||
VSHUF(Chi_10,T1) \
|
||||
VMOVIDUP(0,%r8,Z0 ) \
|
||||
VMOVIDUP(3,%r8,Z1 ) \
|
||||
VMOVIDUP(6,%r8,Z2 ) \
|
||||
VMADDSUB(Z0,T0,UChi_00) \
|
||||
VMADDSUB(Z1,T0,UChi_01) \
|
||||
VMADDSUB(Z2,T0,UChi_02) \
|
||||
\
|
||||
VMOVIDUP(0,%r9,Z0 ) \
|
||||
VMOVIDUP(3,%r9,Z1 ) \
|
||||
VMOVIDUP(6,%r9,Z2 ) \
|
||||
VMADDSUB(Z0,T1,UChi_10) \
|
||||
VMADDSUB(Z1,T1,UChi_11) \
|
||||
VMADDSUB(Z2,T1,UChi_12) \
|
||||
\
|
||||
\
|
||||
VMOVRDUP(0,%r8,Z3 ) \
|
||||
VMOVRDUP(3,%r8,Z4 ) \
|
||||
VMOVRDUP(6,%r8,Z5 ) \
|
||||
VMADDSUB(Z3,Chi_00,UChi_00)/*rr * ir = ri rr*/ \
|
||||
VMADDSUB(Z4,Chi_00,UChi_01) \
|
||||
VMADDSUB(Z5,Chi_00,UChi_02) \
|
||||
\
|
||||
VMOVRDUP(0,%r9,Z3 ) \
|
||||
VMOVRDUP(3,%r9,Z4 ) \
|
||||
VMOVRDUP(6,%r9,Z5 ) \
|
||||
VMADDSUB(Z3,Chi_10,UChi_10) \
|
||||
VMADDSUB(Z4,Chi_10,UChi_11)\
|
||||
VMADDSUB(Z5,Chi_10,UChi_12) \
|
||||
\
|
||||
\
|
||||
VMOVIDUP(1,%r8,Z0 ) \
|
||||
VMOVIDUP(4,%r8,Z1 ) \
|
||||
VMOVIDUP(7,%r8,Z2 ) \
|
||||
VSHUF(Chi_01,T0) \
|
||||
VMADDSUB(Z0,T0,UChi_00) \
|
||||
VMADDSUB(Z1,T0,UChi_01) \
|
||||
VMADDSUB(Z2,T0,UChi_02) \
|
||||
\
|
||||
VMOVIDUP(1,%r9,Z0 ) \
|
||||
VMOVIDUP(4,%r9,Z1 ) \
|
||||
VMOVIDUP(7,%r9,Z2 ) \
|
||||
VSHUF(Chi_11,T1) \
|
||||
VMADDSUB(Z0,T1,UChi_10) \
|
||||
VMADDSUB(Z1,T1,UChi_11) \
|
||||
VMADDSUB(Z2,T1,UChi_12) \
|
||||
\
|
||||
VMOVRDUP(1,%r8,Z3 ) \
|
||||
VMOVRDUP(4,%r8,Z4 ) \
|
||||
VMOVRDUP(7,%r8,Z5 ) \
|
||||
VMADDSUB(Z3,Chi_01,UChi_00) \
|
||||
VMADDSUB(Z4,Chi_01,UChi_01) \
|
||||
VMADDSUB(Z5,Chi_01,UChi_02) \
|
||||
\
|
||||
VMOVRDUP(1,%r9,Z3 ) \
|
||||
VMOVRDUP(4,%r9,Z4 ) \
|
||||
VMOVRDUP(7,%r9,Z5 ) \
|
||||
VMADDSUB(Z3,Chi_11,UChi_10) \
|
||||
VMADDSUB(Z4,Chi_11,UChi_11) \
|
||||
VMADDSUB(Z5,Chi_11,UChi_12) \
|
||||
\
|
||||
VSHUF(Chi_02,T0) \
|
||||
VSHUF(Chi_12,T1) \
|
||||
VMOVIDUP(2,%r8,Z0 ) \
|
||||
VMOVIDUP(5,%r8,Z1 ) \
|
||||
VMOVIDUP(8,%r8,Z2 ) \
|
||||
VMADDSUB(Z0,T0,UChi_00) \
|
||||
VMADDSUB(Z1,T0,UChi_01) \
|
||||
VMADDSUB(Z2,T0,UChi_02) \
|
||||
VMOVIDUP(2,%r9,Z0 ) \
|
||||
VMOVIDUP(5,%r9,Z1 ) \
|
||||
VMOVIDUP(8,%r9,Z2 ) \
|
||||
VMADDSUB(Z0,T1,UChi_10) \
|
||||
VMADDSUB(Z1,T1,UChi_11) \
|
||||
VMADDSUB(Z2,T1,UChi_12) \
|
||||
/*55*/ \
|
||||
VMOVRDUP(2,%r8,Z3 ) \
|
||||
VMOVRDUP(5,%r8,Z4 ) \
|
||||
VMOVRDUP(8,%r8,Z5 ) \
|
||||
VMADDSUB(Z3,Chi_02,UChi_00) \
|
||||
VMADDSUB(Z4,Chi_02,UChi_01) \
|
||||
VMADDSUB(Z5,Chi_02,UChi_02) \
|
||||
VMOVRDUP(2,%r9,Z3 ) \
|
||||
VMOVRDUP(5,%r9,Z4 ) \
|
||||
VMOVRDUP(8,%r9,Z5 ) \
|
||||
VMADDSUB(Z3,Chi_12,UChi_10) \
|
||||
VMADDSUB(Z4,Chi_12,UChi_11) \
|
||||
VMADDSUB(Z5,Chi_12,UChi_12) \
|
||||
/*61 insns*/ );
|
||||
|
||||
#define MULT_ADD_XYZT(g0,g1) \
|
||||
asm ( "movq %0, %%r8 \n\t" \
|
||||
"movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9");\
|
||||
__asm__ ( \
|
||||
VSHUFMEM(0,%r8,Z00) VSHUFMEM(0,%r9,Z10) \
|
||||
VRDUP(Chi_00,T0) VIDUP(Chi_00,Chi_00) \
|
||||
VRDUP(Chi_10,T1) VIDUP(Chi_10,Chi_10) \
|
||||
VMUL(Z00,Chi_00,Z1) VMUL(Z10,Chi_10,Z2) \
|
||||
VSHUFMEM(3,%r8,Z00) VSHUFMEM(3,%r9,Z10) \
|
||||
VMUL(Z00,Chi_00,Z3) VMUL(Z10,Chi_10,Z4) \
|
||||
VSHUFMEM(6,%r8,Z00) VSHUFMEM(6,%r9,Z10) \
|
||||
VMUL(Z00,Chi_00,Z5) VMUL(Z10,Chi_10,Z6) \
|
||||
VMADDMEM(0,%r8,T0,UChi_00) VMADDMEM(0,%r9,T1,UChi_10) \
|
||||
VMADDMEM(3,%r8,T0,UChi_01) VMADDMEM(3,%r9,T1,UChi_11) \
|
||||
VMADDMEM(6,%r8,T0,UChi_02) VMADDMEM(6,%r9,T1,UChi_12) \
|
||||
VSHUFMEM(1,%r8,Z00) VSHUFMEM(1,%r9,Z10) \
|
||||
VRDUP(Chi_01,T0) VIDUP(Chi_01,Chi_01) \
|
||||
VRDUP(Chi_11,T1) VIDUP(Chi_11,Chi_11) \
|
||||
VMADD(Z00,Chi_01,Z1) VMADD(Z10,Chi_11,Z2) \
|
||||
VSHUFMEM(4,%r8,Z00) VSHUFMEM(4,%r9,Z10) \
|
||||
VMADD(Z00,Chi_01,Z3) VMADD(Z10,Chi_11,Z4) \
|
||||
VSHUFMEM(7,%r8,Z00) VSHUFMEM(7,%r9,Z10) \
|
||||
VMADD(Z00,Chi_01,Z5) VMADD(Z10,Chi_11,Z6) \
|
||||
VMADDMEM(1,%r8,T0,UChi_00) VMADDMEM(1,%r9,T1,UChi_10) \
|
||||
VMADDMEM(4,%r8,T0,UChi_01) VMADDMEM(4,%r9,T1,UChi_11) \
|
||||
VMADDMEM(7,%r8,T0,UChi_02) VMADDMEM(7,%r9,T1,UChi_12) \
|
||||
VSHUFMEM(2,%r8,Z00) VSHUFMEM(2,%r9,Z10) \
|
||||
VRDUP(Chi_02,T0) VIDUP(Chi_02,Chi_02) \
|
||||
VRDUP(Chi_12,T1) VIDUP(Chi_12,Chi_12) \
|
||||
VMADD(Z00,Chi_02,Z1) VMADD(Z10,Chi_12,Z2) \
|
||||
VSHUFMEM(5,%r8,Z00) VSHUFMEM(5,%r9,Z10) \
|
||||
VMADD(Z00,Chi_02,Z3) VMADD(Z10,Chi_12,Z4) \
|
||||
VSHUFMEM(8,%r8,Z00) VSHUFMEM(8,%r9,Z10) \
|
||||
VMADD(Z00,Chi_02,Z5) VMADD(Z10,Chi_12,Z6) \
|
||||
VMADDSUBMEM(2,%r8,T0,Z1) VMADDSUBMEM(2,%r9,T1,Z2) \
|
||||
VMADDSUBMEM(5,%r8,T0,Z3) VMADDSUBMEM(5,%r9,T1,Z4) \
|
||||
VMADDSUBMEM(8,%r8,T0,Z5) VMADDSUBMEM(8,%r9,T1,Z6) \
|
||||
VADD(Z1,UChi_00,UChi_00) VADD(Z2,UChi_10,UChi_10) \
|
||||
VADD(Z3,UChi_01,UChi_01) VADD(Z4,UChi_11,UChi_11) \
|
||||
VADD(Z5,UChi_02,UChi_02) VADD(Z6,UChi_12,UChi_12) );
|
||||
|
||||
#define MULT_XYZT(g0,g1) \
|
||||
asm ( "movq %0, %%r8 \n\t" \
|
||||
"movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9" ); \
|
||||
__asm__ ( \
|
||||
VSHUF(Chi_00,T0) \
|
||||
VSHUF(Chi_10,T1) \
|
||||
VMOVIDUP(0,%r8,Z0 ) \
|
||||
VMOVIDUP(3,%r8,Z1 ) \
|
||||
VMOVIDUP(6,%r8,Z2 ) \
|
||||
/*6*/ \
|
||||
VMUL(Z0,T0,UChi_00) \
|
||||
VMUL(Z1,T0,UChi_01) \
|
||||
VMUL(Z2,T0,UChi_02) \
|
||||
VMOVIDUP(0,%r9,Z0 ) \
|
||||
VMOVIDUP(3,%r9,Z1 ) \
|
||||
VMOVIDUP(6,%r9,Z2 ) \
|
||||
VMUL(Z0,T1,UChi_10) \
|
||||
VMUL(Z1,T1,UChi_11) \
|
||||
VMUL(Z2,T1,UChi_12) \
|
||||
VMOVRDUP(0,%r8,Z3 ) \
|
||||
VMOVRDUP(3,%r8,Z4 ) \
|
||||
VMOVRDUP(6,%r8,Z5 ) \
|
||||
/*18*/ \
|
||||
VMADDSUB(Z3,Chi_00,UChi_00) \
|
||||
VMADDSUB(Z4,Chi_00,UChi_01)\
|
||||
VMADDSUB(Z5,Chi_00,UChi_02) \
|
||||
VMOVRDUP(0,%r9,Z3 ) \
|
||||
VMOVRDUP(3,%r9,Z4 ) \
|
||||
VMOVRDUP(6,%r9,Z5 ) \
|
||||
VMADDSUB(Z3,Chi_10,UChi_10) \
|
||||
VMADDSUB(Z4,Chi_10,UChi_11)\
|
||||
VMADDSUB(Z5,Chi_10,UChi_12) \
|
||||
VMOVIDUP(1,%r8,Z0 ) \
|
||||
VMOVIDUP(4,%r8,Z1 ) \
|
||||
VMOVIDUP(7,%r8,Z2 ) \
|
||||
/*28*/ \
|
||||
VSHUF(Chi_01,T0) \
|
||||
VMADDSUB(Z0,T0,UChi_00) \
|
||||
VMADDSUB(Z1,T0,UChi_01) \
|
||||
VMADDSUB(Z2,T0,UChi_02) \
|
||||
VMOVIDUP(1,%r9,Z0 ) \
|
||||
VMOVIDUP(4,%r9,Z1 ) \
|
||||
VMOVIDUP(7,%r9,Z2 ) \
|
||||
VSHUF(Chi_11,T1) \
|
||||
VMADDSUB(Z0,T1,UChi_10) \
|
||||
VMADDSUB(Z1,T1,UChi_11) \
|
||||
VMADDSUB(Z2,T1,UChi_12) \
|
||||
VMOVRDUP(1,%r8,Z3 ) \
|
||||
VMOVRDUP(4,%r8,Z4 ) \
|
||||
VMOVRDUP(7,%r8,Z5 ) \
|
||||
/*38*/ \
|
||||
VMADDSUB(Z3,Chi_01,UChi_00) \
|
||||
VMADDSUB(Z4,Chi_01,UChi_01) \
|
||||
VMADDSUB(Z5,Chi_01,UChi_02) \
|
||||
VMOVRDUP(1,%r9,Z3 ) \
|
||||
VMOVRDUP(4,%r9,Z4 ) \
|
||||
VMOVRDUP(7,%r9,Z5 ) \
|
||||
VMADDSUB(Z3,Chi_11,UChi_10) \
|
||||
VMADDSUB(Z4,Chi_11,UChi_11) \
|
||||
VMADDSUB(Z5,Chi_11,UChi_12) \
|
||||
/*48*/ \
|
||||
VSHUF(Chi_02,T0) \
|
||||
VSHUF(Chi_12,T1) \
|
||||
VMOVIDUP(2,%r8,Z0 ) \
|
||||
VMOVIDUP(5,%r8,Z1 ) \
|
||||
VMOVIDUP(8,%r8,Z2 ) \
|
||||
VMADDSUB(Z0,T0,UChi_00) \
|
||||
VMADDSUB(Z1,T0,UChi_01) \
|
||||
VMADDSUB(Z2,T0,UChi_02) \
|
||||
VMOVIDUP(2,%r9,Z0 ) \
|
||||
VMOVIDUP(5,%r9,Z1 ) \
|
||||
VMOVIDUP(8,%r9,Z2 ) \
|
||||
VMADDSUB(Z0,T1,UChi_10) \
|
||||
VMADDSUB(Z1,T1,UChi_11) \
|
||||
VMADDSUB(Z2,T1,UChi_12) \
|
||||
/*55*/ \
|
||||
VMOVRDUP(2,%r8,Z3 ) \
|
||||
VMOVRDUP(5,%r8,Z4 ) \
|
||||
VMOVRDUP(8,%r8,Z5 ) \
|
||||
VMADDSUB(Z3,Chi_02,UChi_00) \
|
||||
VMADDSUB(Z4,Chi_02,UChi_01) \
|
||||
VMADDSUB(Z5,Chi_02,UChi_02) \
|
||||
VMOVRDUP(2,%r9,Z3 ) \
|
||||
VMOVRDUP(5,%r9,Z4 ) \
|
||||
VMOVRDUP(8,%r9,Z5 ) \
|
||||
VMADDSUB(Z3,Chi_12,UChi_10) \
|
||||
VMADDSUB(Z4,Chi_12,UChi_11) \
|
||||
VMADDSUB(Z5,Chi_12,UChi_12) \
|
||||
/*61 insns*/ );
|
||||
|
||||
#define MULT_XYZTa(g0,g1) \
|
||||
asm ( "movq %0, %%r8 \n\t" \
|
||||
"movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9" ); \
|
||||
__asm__ ( \
|
||||
VSHUFMEM(0,%r8,Z00) VSHUFMEM(0,%r9,Z10) \
|
||||
VRDUP(Chi_00,T0) VIDUP(Chi_00,Chi_00) \
|
||||
VRDUP(Chi_10,T1) VIDUP(Chi_10,Chi_10) \
|
||||
VMUL(Z00,Chi_00,Z1) VMUL(Z10,Chi_10,Z2) \
|
||||
VSHUFMEM(3,%r8,Z00) VSHUFMEM(3,%r9,Z10) \
|
||||
VMUL(Z00,Chi_00,Z3) VMUL(Z10,Chi_10,Z4) \
|
||||
VSHUFMEM(6,%r8,Z00) VSHUFMEM(6,%r9,Z10) \
|
||||
VMUL(Z00,Chi_00,Z5) VMUL(Z10,Chi_10,Z6) \
|
||||
VMULMEM(0,%r8,T0,UChi_00) VMULMEM(0,%r9,T1,UChi_10) \
|
||||
VMULMEM(3,%r8,T0,UChi_01) VMULMEM(3,%r9,T1,UChi_11) \
|
||||
VMULMEM(6,%r8,T0,UChi_02) VMULMEM(6,%r9,T1,UChi_12) \
|
||||
VSHUFMEM(1,%r8,Z00) VSHUFMEM(1,%r9,Z10) \
|
||||
VRDUP(Chi_01,T0) VIDUP(Chi_01,Chi_01) \
|
||||
VRDUP(Chi_11,T1) VIDUP(Chi_11,Chi_11) \
|
||||
VMADD(Z00,Chi_01,Z1) VMADD(Z10,Chi_11,Z2) \
|
||||
VSHUFMEM(4,%r8,Z00) VSHUFMEM(4,%r9,Z10) \
|
||||
VMADD(Z00,Chi_01,Z3) VMADD(Z10,Chi_11,Z4) \
|
||||
VSHUFMEM(7,%r8,Z00) VSHUFMEM(7,%r9,Z10) \
|
||||
VMADD(Z00,Chi_01,Z5) VMADD(Z10,Chi_11,Z6) \
|
||||
VMADDMEM(1,%r8,T0,UChi_00) VMADDMEM(1,%r9,T1,UChi_10) \
|
||||
VMADDMEM(4,%r8,T0,UChi_01) VMADDMEM(4,%r9,T1,UChi_11) \
|
||||
VMADDMEM(7,%r8,T0,UChi_02) VMADDMEM(7,%r9,T1,UChi_12) \
|
||||
VSHUFMEM(2,%r8,Z00) VSHUFMEM(2,%r9,Z10) \
|
||||
VRDUP(Chi_02,T0) VIDUP(Chi_02,Chi_02) \
|
||||
VRDUP(Chi_12,T1) VIDUP(Chi_12,Chi_12) \
|
||||
VMADD(Z00,Chi_02,Z1) VMADD(Z10,Chi_12,Z2) \
|
||||
VSHUFMEM(5,%r8,Z00) VSHUFMEM(5,%r9,Z10) \
|
||||
VMADD(Z00,Chi_02,Z3) VMADD(Z10,Chi_12,Z4) \
|
||||
VSHUFMEM(8,%r8,Z00) VSHUFMEM(8,%r9,Z10) \
|
||||
VMADD(Z00,Chi_02,Z5) VMADD(Z10,Chi_12,Z6) \
|
||||
VMADDSUBMEM(2,%r8,T0,Z1) VMADDSUBMEM(2,%r9,T1,Z2) \
|
||||
VMADDSUBMEM(5,%r8,T0,Z3) VMADDSUBMEM(5,%r9,T1,Z4) \
|
||||
VMADDSUBMEM(8,%r8,T0,Z5) VMADDSUBMEM(8,%r9,T1,Z6) \
|
||||
VADD(Z1,UChi_00,UChi_00) VADD(Z2,UChi_10,UChi_10) \
|
||||
VADD(Z3,UChi_01,UChi_01) VADD(Z4,UChi_11,UChi_11) \
|
||||
VADD(Z5,UChi_02,UChi_02) VADD(Z6,UChi_12,UChi_12) );
|
||||
|
||||
|
||||
#define LOAD_CHI(a0,a1,a2,a3) \
|
||||
asm ( \
|
||||
"movq %0, %%r8 \n\t" \
|
||||
VLOAD(0,%%r8,pChi_00) \
|
||||
VLOAD(1,%%r8,pChi_01) \
|
||||
VLOAD(2,%%r8,pChi_02) \
|
||||
: : "r" (a0) : "%r8" ); \
|
||||
asm ( \
|
||||
"movq %0, %%r8 \n\t" \
|
||||
VLOAD(0,%%r8,pChi_10) \
|
||||
VLOAD(1,%%r8,pChi_11) \
|
||||
VLOAD(2,%%r8,pChi_12) \
|
||||
: : "r" (a1) : "%r8" ); \
|
||||
asm ( \
|
||||
"movq %0, %%r8 \n\t" \
|
||||
VLOAD(0,%%r8,pChi_20) \
|
||||
VLOAD(1,%%r8,pChi_21) \
|
||||
VLOAD(2,%%r8,pChi_22) \
|
||||
: : "r" (a2) : "%r8" ); \
|
||||
asm ( \
|
||||
"movq %0, %%r8 \n\t" \
|
||||
VLOAD(0,%%r8,pChi_30) \
|
||||
VLOAD(1,%%r8,pChi_31) \
|
||||
VLOAD(2,%%r8,pChi_32) \
|
||||
: : "r" (a3) : "%r8" );
|
||||
|
||||
#define LOAD_CHIa(a0,a1) \
|
||||
asm ( \
|
||||
"movq %0, %%r8 \n\t" \
|
||||
VLOAD(0,%%r8,pChi_00) \
|
||||
VLOAD(1,%%r8,pChi_01) \
|
||||
VLOAD(2,%%r8,pChi_02) \
|
||||
: : "r" (a0) : "%r8" ); \
|
||||
asm ( \
|
||||
"movq %0, %%r8 \n\t" \
|
||||
VLOAD(0,%%r8,pChi_10) \
|
||||
VLOAD(1,%%r8,pChi_11) \
|
||||
VLOAD(2,%%r8,pChi_12) \
|
||||
: : "r" (a1) : "%r8" );
|
||||
|
||||
#define PF_CHI(a0)
|
||||
#define PF_CHIa(a0) \
|
||||
asm ( \
|
||||
"movq %0, %%r8 \n\t" \
|
||||
VPREFETCH1(0,%%r8) \
|
||||
VPREFETCH1(1,%%r8) \
|
||||
VPREFETCH1(2,%%r8) \
|
||||
: : "r" (a0) : "%r8" ); \
|
||||
|
||||
#define PF_GAUGE_XYZT(a0)
|
||||
#define PF_GAUGE_XYZTa(a0) \
|
||||
asm ( \
|
||||
"movq %0, %%r8 \n\t" \
|
||||
VPREFETCH1(0,%%r8) \
|
||||
VPREFETCH1(1,%%r8) \
|
||||
VPREFETCH1(2,%%r8) \
|
||||
VPREFETCH1(3,%%r8) \
|
||||
VPREFETCH1(4,%%r8) \
|
||||
VPREFETCH1(5,%%r8) \
|
||||
VPREFETCH1(6,%%r8) \
|
||||
VPREFETCH1(7,%%r8) \
|
||||
VPREFETCH1(8,%%r8) \
|
||||
: : "r" (a0) : "%r8" ); \
|
||||
|
||||
#define PF_GAUGE_LS(a0)
|
||||
#define PF_GAUGE_LSa(a0) \
|
||||
asm ( \
|
||||
"movq %0, %%r8 \n\t" \
|
||||
VPREFETCH1(0,%%r8) \
|
||||
VPREFETCH1(1,%%r8) \
|
||||
: : "r" (a0) : "%r8" ); \
|
||||
|
||||
|
||||
#define REDUCE(out) \
|
||||
asm ( \
|
||||
VADD(UChi_00,UChi_10,UChi_00) \
|
||||
VADD(UChi_01,UChi_11,UChi_01) \
|
||||
VADD(UChi_02,UChi_12,UChi_02) \
|
||||
VADD(UChi_30,UChi_20,UChi_30) \
|
||||
VADD(UChi_31,UChi_21,UChi_31) \
|
||||
VADD(UChi_32,UChi_22,UChi_32) \
|
||||
VADD(UChi_00,UChi_30,UChi_00) \
|
||||
VADD(UChi_01,UChi_31,UChi_01) \
|
||||
VADD(UChi_02,UChi_32,UChi_02) ); \
|
||||
asm ( \
|
||||
VSTORE(0,%0,pUChi_00) \
|
||||
VSTORE(1,%0,pUChi_01) \
|
||||
VSTORE(2,%0,pUChi_02) \
|
||||
: : "r" (out) : "memory" );
|
||||
|
||||
#define REDUCEa(out) \
|
||||
asm ( \
|
||||
VADD(UChi_00,UChi_10,UChi_00) \
|
||||
VADD(UChi_01,UChi_11,UChi_01) \
|
||||
VADD(UChi_02,UChi_12,UChi_02) ); \
|
||||
asm ( \
|
||||
VSTORE(0,%0,pUChi_00) \
|
||||
VSTORE(1,%0,pUChi_01) \
|
||||
VSTORE(2,%0,pUChi_02) \
|
||||
: : "r" (out) : "memory" );
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
permute##dir(Chi_0,Chi_0);\
|
||||
permute##dir(Chi_1,Chi_1);\
|
||||
permute##dir(Chi_2,Chi_2);
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
|
||||
};
|
||||
|
||||
|
||||
//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in._odata[o] ; } else { out =(uint64_t) &buf[o]; }
|
||||
|
||||
#define CONDITIONAL_MOVE(l,o,out) { const SiteSpinor *ptr = l? in_p : buf; out = (uint64_t) &ptr[o]; }
|
||||
|
||||
#define PREPARE_XYZT(X,Y,Z,T,skew,UU) \
|
||||
PREPARE(X,Y,Z,T,skew,UU); \
|
||||
PF_GAUGE_XYZT(gauge0); \
|
||||
PF_GAUGE_XYZT(gauge1); \
|
||||
PF_GAUGE_XYZT(gauge2); \
|
||||
PF_GAUGE_XYZT(gauge3);
|
||||
|
||||
#define PREPARE_LS(X,Y,Z,T,skew,UU) \
|
||||
PREPARE(X,Y,Z,T,skew,UU); \
|
||||
PF_GAUGE_LS(gauge0); \
|
||||
PF_GAUGE_LS(gauge1); \
|
||||
PF_GAUGE_LS(gauge2); \
|
||||
PF_GAUGE_LS(gauge3);
|
||||
|
||||
#define PREPARE(X,Y,Z,T,skew,UU) \
|
||||
SE0=st.GetEntry(ptype,X+skew,sF); \
|
||||
o0 = SE0->_offset; \
|
||||
l0 = SE0->_is_local; \
|
||||
p0 = SE0->_permute; \
|
||||
CONDITIONAL_MOVE(l0,o0,addr0); \
|
||||
PF_CHI(addr0); \
|
||||
\
|
||||
SE1=st.GetEntry(ptype,Y+skew,sF); \
|
||||
o1 = SE1->_offset; \
|
||||
l1 = SE1->_is_local; \
|
||||
p1 = SE1->_permute; \
|
||||
CONDITIONAL_MOVE(l1,o1,addr1); \
|
||||
PF_CHI(addr1); \
|
||||
\
|
||||
SE2=st.GetEntry(ptype,Z+skew,sF); \
|
||||
o2 = SE2->_offset; \
|
||||
l2 = SE2->_is_local; \
|
||||
p2 = SE2->_permute; \
|
||||
CONDITIONAL_MOVE(l2,o2,addr2); \
|
||||
PF_CHI(addr2); \
|
||||
\
|
||||
SE3=st.GetEntry(ptype,T+skew,sF); \
|
||||
o3 = SE3->_offset; \
|
||||
l3 = SE3->_is_local; \
|
||||
p3 = SE3->_permute; \
|
||||
CONDITIONAL_MOVE(l3,o3,addr3); \
|
||||
PF_CHI(addr3); \
|
||||
\
|
||||
gauge0 =(uint64_t)&UU._odata[sU]( X ); \
|
||||
gauge1 =(uint64_t)&UU._odata[sU]( Y ); \
|
||||
gauge2 =(uint64_t)&UU._odata[sU]( Z ); \
|
||||
gauge3 =(uint64_t)&UU._odata[sU]( T );
|
||||
|
||||
// This is the single precision 5th direction vectorised kernel
|
||||
#include <simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
uint64_t addr0,addr1,addr2,addr3;
|
||||
const SiteSpinor *in_p; in_p = &in._odata[0];
|
||||
|
||||
int o0,o1,o2,o3; // offsets
|
||||
int l0,l1,l2,l3; // local
|
||||
int p0,p1,p2,p3; // perm
|
||||
int ptype;
|
||||
StencilEntry *SE0;
|
||||
StencilEntry *SE1;
|
||||
StencilEntry *SE2;
|
||||
StencilEntry *SE3;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
|
||||
int sF=s+LLs*sU;
|
||||
// Xp, Yp, Zp, Tp
|
||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||
MULT_LS(gauge0,gauge1,gauge2,gauge3);
|
||||
|
||||
PREPARE(Xm,Ym,Zm,Tm,0,U);
|
||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
||||
|
||||
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
|
||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
||||
|
||||
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
|
||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
||||
|
||||
addr0 = (uint64_t) &out._odata[sF];
|
||||
REDUCE(addr0);
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
uint64_t addr0,addr1,addr2,addr3;
|
||||
const SiteSpinor *in_p; in_p = &in._odata[0];
|
||||
|
||||
int o0,o1,o2,o3; // offsets
|
||||
int l0,l1,l2,l3; // local
|
||||
int p0,p1,p2,p3; // perm
|
||||
int ptype;
|
||||
StencilEntry *SE0;
|
||||
StencilEntry *SE1;
|
||||
StencilEntry *SE2;
|
||||
StencilEntry *SE3;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
// Xp, Yp, Zp, Tp
|
||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||
MULT_LS(gauge0,gauge1,gauge2,gauge3);
|
||||
|
||||
PREPARE(Xm,Ym,Zm,Tm,0,U);
|
||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
||||
|
||||
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
|
||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
||||
|
||||
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
|
||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
||||
|
||||
addr0 = (uint64_t) &out._odata[sF];
|
||||
REDUCE(addr0);
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#define PERMUTE_DIR3 __asm__ ( \
|
||||
VPERM3(Chi_00,Chi_00) \
|
||||
VPERM3(Chi_01,Chi_01) \
|
||||
VPERM3(Chi_02,Chi_02) );
|
||||
|
||||
#define PERMUTE_DIR2 __asm__ ( \
|
||||
VPERM2(Chi_10,Chi_10) \
|
||||
VPERM2(Chi_11,Chi_11) \
|
||||
VPERM2(Chi_12,Chi_12) );
|
||||
|
||||
#define PERMUTE_DIR1 __asm__ ( \
|
||||
VPERM1(Chi_00,Chi_00) \
|
||||
VPERM1(Chi_01,Chi_01) \
|
||||
VPERM1(Chi_02,Chi_02) );
|
||||
|
||||
#define PERMUTE_DIR0 __asm__ ( \
|
||||
VPERM0(Chi_10,Chi_10) \
|
||||
VPERM0(Chi_11,Chi_11) \
|
||||
VPERM0(Chi_12,Chi_12) );
|
||||
|
||||
#define PERMUTE01 \
|
||||
if ( p0 ) { PERMUTE_DIR3; }\
|
||||
if ( p1 ) { PERMUTE_DIR2; }
|
||||
|
||||
#define PERMUTE23 \
|
||||
if ( p2 ) { PERMUTE_DIR1; }\
|
||||
if ( p3 ) { PERMUTE_DIR0; }
|
||||
|
||||
// This is the single precision 5th direction vectorised kernel
|
||||
|
||||
#include <simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
uint64_t addr0,addr1,addr2,addr3;
|
||||
const SiteSpinor *in_p; in_p = &in._odata[0];
|
||||
|
||||
int o0,o1,o2,o3; // offsets
|
||||
int l0,l1,l2,l3; // local
|
||||
int p0,p1,p2,p3; // perm
|
||||
int ptype;
|
||||
StencilEntry *SE0;
|
||||
StencilEntry *SE1;
|
||||
StencilEntry *SE2;
|
||||
StencilEntry *SE3;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
|
||||
int sF=s+LLs*sU;
|
||||
// Xp, Yp, Zp, Tp
|
||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||
LOAD_CHIa(addr0,addr1);
|
||||
PERMUTE01;
|
||||
MULT_XYZT(gauge0,gauge1);
|
||||
LOAD_CHIa(addr2,addr3);
|
||||
PERMUTE23;
|
||||
MULT_ADD_XYZT(gauge2,gauge3);
|
||||
|
||||
PREPARE(Xm,Ym,Zm,Tm,0,U);
|
||||
LOAD_CHIa(addr0,addr1);
|
||||
PERMUTE01;
|
||||
MULT_ADD_XYZT(gauge0,gauge1);
|
||||
LOAD_CHIa(addr2,addr3);
|
||||
PERMUTE23;
|
||||
MULT_ADD_XYZT(gauge2,gauge3);
|
||||
|
||||
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
|
||||
LOAD_CHIa(addr0,addr1);
|
||||
PERMUTE01;
|
||||
MULT_ADD_XYZT(gauge0,gauge1);
|
||||
LOAD_CHIa(addr2,addr3);
|
||||
PERMUTE23;
|
||||
MULT_ADD_XYZT(gauge2,gauge3);
|
||||
|
||||
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
|
||||
LOAD_CHIa(addr0,addr1);
|
||||
PERMUTE01;
|
||||
MULT_ADD_XYZT(gauge0,gauge1);
|
||||
LOAD_CHIa(addr2,addr3);
|
||||
PERMUTE23;
|
||||
MULT_ADD_XYZT(gauge2,gauge3);
|
||||
|
||||
addr0 = (uint64_t) &out._odata[sF];
|
||||
REDUCEa(addr0);
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
uint64_t addr0,addr1,addr2,addr3;
|
||||
const SiteSpinor *in_p; in_p = &in._odata[0];
|
||||
|
||||
int o0,o1,o2,o3; // offsets
|
||||
int l0,l1,l2,l3; // local
|
||||
int p0,p1,p2,p3; // perm
|
||||
int ptype;
|
||||
StencilEntry *SE0;
|
||||
StencilEntry *SE1;
|
||||
StencilEntry *SE2;
|
||||
StencilEntry *SE3;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
|
||||
int sF=s+LLs*sU;
|
||||
// Xp, Yp, Zp, Tp
|
||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||
LOAD_CHIa(addr0,addr1);
|
||||
PERMUTE01;
|
||||
MULT_XYZT(gauge0,gauge1);
|
||||
LOAD_CHIa(addr2,addr3);
|
||||
PERMUTE23;
|
||||
MULT_ADD_XYZT(gauge2,gauge3);
|
||||
|
||||
PREPARE(Xm,Ym,Zm,Tm,0,U);
|
||||
LOAD_CHIa(addr0,addr1);
|
||||
PERMUTE01;
|
||||
MULT_ADD_XYZT(gauge0,gauge1);
|
||||
LOAD_CHIa(addr2,addr3);
|
||||
PERMUTE23;
|
||||
MULT_ADD_XYZT(gauge2,gauge3);
|
||||
|
||||
PREPARE(Xp,Yp,Zp,Tp,8,UUU);
|
||||
LOAD_CHIa(addr0,addr1);
|
||||
PERMUTE01;
|
||||
MULT_ADD_XYZT(gauge0,gauge1);
|
||||
LOAD_CHIa(addr2,addr3);
|
||||
PERMUTE23;
|
||||
MULT_ADD_XYZT(gauge2,gauge3);
|
||||
|
||||
PREPARE(Xm,Ym,Zm,Tm,8,UUU);
|
||||
LOAD_CHIa(addr0,addr1);
|
||||
PERMUTE01;
|
||||
MULT_ADD_XYZT(gauge0,gauge1);
|
||||
LOAD_CHIa(addr2,addr3);
|
||||
PERMUTE23;
|
||||
MULT_ADD_XYZT(gauge2,gauge3);
|
||||
|
||||
addr0 = (uint64_t) &out._odata[sF];
|
||||
REDUCEa(addr0);
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
|
||||
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
|
||||
|
||||
}}
|
||||
|
305
lib/qcd/action/fermion/StaggeredKernelsHand.cc
Normal file
305
lib/qcd/action/fermion/StaggeredKernelsHand.cc
Normal file
@ -0,0 +1,305 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
|
||||
#define REGISTER
|
||||
|
||||
#define LOAD_CHI(b) \
|
||||
const SiteSpinor & ref (b[offset]); \
|
||||
Chi_0=ref()()(0);\
|
||||
Chi_1=ref()()(1);\
|
||||
Chi_2=ref()()(2);
|
||||
|
||||
|
||||
// To splat or not to splat depends on the implementation
|
||||
#define MULT(A,UChi) \
|
||||
auto & ref(U._odata[sU](A)); \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
Impl::loadLinkElement(U_02,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_12,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_22,ref()(2,2)); \
|
||||
UChi ## _0 = U_00*Chi_0; \
|
||||
UChi ## _1 = U_10*Chi_0;\
|
||||
UChi ## _2 = U_20*Chi_0;\
|
||||
UChi ## _0 += U_01*Chi_1;\
|
||||
UChi ## _1 += U_11*Chi_1;\
|
||||
UChi ## _2 += U_21*Chi_1;\
|
||||
UChi ## _0 += U_02*Chi_2;\
|
||||
UChi ## _1 += U_12*Chi_2;\
|
||||
UChi ## _2 += U_22*Chi_2;
|
||||
|
||||
#define MULT_ADD(A,UChi) \
|
||||
auto & ref(U._odata[sU](A)); \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
Impl::loadLinkElement(U_02,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_12,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_22,ref()(2,2)); \
|
||||
UChi ## _0 += U_00*Chi_0; \
|
||||
UChi ## _1 += U_10*Chi_0;\
|
||||
UChi ## _2 += U_20*Chi_0;\
|
||||
UChi ## _0 += U_01*Chi_1;\
|
||||
UChi ## _1 += U_11*Chi_1;\
|
||||
UChi ## _2 += U_21*Chi_1;\
|
||||
UChi ## _0 += U_02*Chi_2;\
|
||||
UChi ## _1 += U_12*Chi_2;\
|
||||
UChi ## _2 += U_22*Chi_2;
|
||||
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
permute##dir(Chi_0,Chi_0);\
|
||||
permute##dir(Chi_1,Chi_1);\
|
||||
permute##dir(Chi_2,Chi_2);
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
SiteSpinor naik;
|
||||
SiteSpinor naive;
|
||||
int oneLink =0;
|
||||
int threeLink=1;
|
||||
int skew(0);
|
||||
Real scale(1.0);
|
||||
|
||||
if(dag) scale = -1.0;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
DhopSiteDepthHand(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||
DhopSiteDepthHand(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||
out._odata[sF] =scale*(naive+naik);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
REGISTER Simd even_0; // 12 regs on knc
|
||||
REGISTER Simd even_1;
|
||||
REGISTER Simd even_2;
|
||||
REGISTER Simd odd_0; // 12 regs on knc
|
||||
REGISTER Simd odd_1;
|
||||
REGISTER Simd odd_2;
|
||||
|
||||
REGISTER Simd Chi_0; // two spinor; 6 regs
|
||||
REGISTER Simd Chi_1;
|
||||
REGISTER Simd Chi_2;
|
||||
|
||||
REGISTER Simd U_00; // two rows of U matrix
|
||||
REGISTER Simd U_10;
|
||||
REGISTER Simd U_20;
|
||||
REGISTER Simd U_01;
|
||||
REGISTER Simd U_11;
|
||||
REGISTER Simd U_21; // 2 reg left.
|
||||
REGISTER Simd U_02;
|
||||
REGISTER Simd U_12;
|
||||
REGISTER Simd U_22;
|
||||
|
||||
int skew = 0;
|
||||
if (threeLink) skew=8;
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
// Xp
|
||||
SE=st.GetEntry(ptype,Xp+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT(Xp,even);
|
||||
}
|
||||
|
||||
// Yp
|
||||
SE=st.GetEntry(ptype,Yp+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT(Yp,odd);
|
||||
}
|
||||
|
||||
|
||||
// Zp
|
||||
SE=st.GetEntry(ptype,Zp+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Zp,even);
|
||||
}
|
||||
|
||||
// Tp
|
||||
SE=st.GetEntry(ptype,Tp+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Tp,odd);
|
||||
}
|
||||
|
||||
// Xm
|
||||
SE=st.GetEntry(ptype,Xm+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Xm,even);
|
||||
}
|
||||
|
||||
|
||||
// Ym
|
||||
SE=st.GetEntry(ptype,Ym+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Ym,odd);
|
||||
}
|
||||
|
||||
// Zm
|
||||
SE=st.GetEntry(ptype,Zm+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Zm,even);
|
||||
}
|
||||
|
||||
// Tm
|
||||
SE=st.GetEntry(ptype,Tm+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Tm,odd);
|
||||
}
|
||||
|
||||
vstream(out()()(0),even_0+odd_0);
|
||||
vstream(out()()(1),even_1+odd_1);
|
||||
vstream(out()()(2),even_2+odd_2);
|
||||
|
||||
}
|
||||
|
||||
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
|
||||
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
|
||||
|
||||
}}
|
@ -34,10 +34,9 @@ directory
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2,
|
||||
3});
|
||||
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1,
|
||||
-1, -1});
|
||||
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
|
||||
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
|
||||
|
||||
int WilsonFermionStatic::HandOptDslash;
|
||||
|
||||
/////////////////////////////////
|
||||
@ -224,8 +223,7 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||
////////////////////////
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < B._grid->oSites(); sss++) {
|
||||
Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu,
|
||||
gamma);
|
||||
Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
@ -335,8 +333,7 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out,
|
||||
dirdisp, gamma);
|
||||
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma);
|
||||
}
|
||||
};
|
||||
|
||||
@ -353,14 +350,12 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||
if (dag == DaggerYes) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
|
||||
out);
|
||||
Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DiracOptDhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
|
||||
out);
|
||||
Kernels::DiracOptDhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -63,71 +63,55 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||
LebesgueEvenOdd(_FourDimRedBlackGrid),
|
||||
_tmp(&FiveDimRedBlackGrid)
|
||||
{
|
||||
// some assertions
|
||||
assert(FiveDimGrid._ndimension==5);
|
||||
assert(FourDimGrid._ndimension==4);
|
||||
assert(FourDimRedBlackGrid._ndimension==4);
|
||||
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
|
||||
|
||||
// extent of fifth dim and not spread out
|
||||
Ls=FiveDimGrid._fdimensions[0];
|
||||
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
||||
assert(FiveDimGrid._processors[0] ==1);
|
||||
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||
|
||||
// Other dimensions must match the decomposition of the four-D fields
|
||||
for(int d=0;d<4;d++){
|
||||
|
||||
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
|
||||
|
||||
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
||||
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
||||
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
|
||||
|
||||
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
|
||||
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
|
||||
}
|
||||
|
||||
if (Impl::LsVectorised) {
|
||||
|
||||
int nsimd = Simd::Nsimd();
|
||||
|
||||
// some assertions
|
||||
assert(FiveDimGrid._ndimension==5);
|
||||
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
|
||||
assert(FourDimGrid._ndimension==4);
|
||||
|
||||
// Dimension zero of the five-d is the Ls direction
|
||||
Ls=FiveDimGrid._fdimensions[0];
|
||||
assert(FiveDimGrid._processors[0] ==1);
|
||||
assert(FiveDimGrid._simd_layout[0] ==nsimd);
|
||||
|
||||
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
||||
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
|
||||
|
||||
// Other dimensions must match the decomposition of the four-D fields
|
||||
for(int d=0;d<4;d++){
|
||||
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
||||
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
|
||||
assert(FourDimGrid._simd_layout[d]=1);
|
||||
assert(FourDimRedBlackGrid._simd_layout[d]=1);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
|
||||
|
||||
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
||||
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
// some assertions
|
||||
assert(FiveDimGrid._ndimension==5);
|
||||
assert(FourDimGrid._ndimension==4);
|
||||
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||
assert(FourDimRedBlackGrid._ndimension==4);
|
||||
assert(FiveDimRedBlackGrid._checker_dim==1);
|
||||
|
||||
// Dimension zero of the five-d is the Ls direction
|
||||
Ls=FiveDimGrid._fdimensions[0];
|
||||
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
||||
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
|
||||
assert(FiveDimGrid._processors[0] ==1);
|
||||
assert(FiveDimGrid._simd_layout[0] ==1);
|
||||
|
||||
// Other dimensions must match the decomposition of the four-D fields
|
||||
for(int d=0;d<4;d++){
|
||||
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
|
||||
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
||||
|
||||
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
|
||||
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
|
||||
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
|
||||
|
||||
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
||||
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Allocate the required comms buffer
|
||||
|
@ -63,8 +63,7 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
|
||||
Phi(Op.FermionGrid()){};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Push the gauge field in to the dops. Assume any BC's and smearing already
|
||||
// applied
|
||||
// Push the gauge field in to the dops. Assume any BC's and smearing already applied
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) {
|
||||
// P(phi) = e^{- phi^dag (MdagM)^-1 phi}
|
||||
@ -107,8 +106,7 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
|
||||
MdagMOp.Op(X, Y);
|
||||
|
||||
RealD action = norm2(Y);
|
||||
std::cout << GridLogMessage << "Pseudofermion action " << action
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "Pseudofermion action " << action << std::endl;
|
||||
return action;
|
||||
};
|
||||
|
||||
@ -119,6 +117,7 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
|
||||
//
|
||||
// = - Ydag dM X - Xdag dMdag Y
|
||||
//
|
||||
//
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
|
||||
FermOp.ImportGauge(U);
|
||||
@ -133,8 +132,7 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
|
||||
DerivativeSolver(MdagMOp, Phi, X); // X = (MdagM)^-1 phi
|
||||
MdagMOp.Op(X, Y); // Y = M X = (Mdag)^-1 phi
|
||||
|
||||
// Our conventions really make this UdSdU; We do not differentiate wrt Udag
|
||||
// here.
|
||||
// Our conventions really make this UdSdU; We do not differentiate wrt Udag here.
|
||||
// So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt.
|
||||
|
||||
FermOp.MDeriv(tmp, Y, X, DaggerNo);
|
||||
|
@ -91,7 +91,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG sRNG4(sUGrid); sRNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG sRNG5(sFGrid); sRNG5.SeedFixedIntegers(seeds5);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
|
@ -64,7 +64,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref(FGrid); ref=zero;
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -70,7 +70,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
RealD mass=0.1;
|
||||
|
@ -72,7 +72,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref(FGrid); ref=zero;
|
||||
LatticeFermion tmp(FGrid); tmp=zero;
|
||||
LatticeFermion err(FGrid); tmp=zero;
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
// Only one non-zero (y)
|
||||
|
@ -81,7 +81,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
// Only one non-zero (y)
|
||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
||||
FermionField ref(&Grid); ref=zero;
|
||||
FermionField tmp(&Grid); tmp=zero;
|
||||
FermionField err(&Grid); tmp=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
double volume=1;
|
||||
|
291
tests/core/Test_staggered.cc
Normal file
291
tests/core/Test_staggered.cc
Normal file
@ -0,0 +1,291 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./benchmarks/Benchmark_wilson.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG(&Grid);
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
// pRNG.SeedRandomDevice();
|
||||
|
||||
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
|
||||
typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
|
||||
typename ImprovedStaggeredFermionR::ImplParams params;
|
||||
|
||||
FermionField src (&Grid); random(pRNG,src);
|
||||
FermionField result(&Grid); result=zero;
|
||||
FermionField ref(&Grid); ref=zero;
|
||||
FermionField tmp(&Grid); tmp=zero;
|
||||
FermionField err(&Grid); tmp=zero;
|
||||
FermionField phi (&Grid); random(pRNG,phi);
|
||||
FermionField chi (&Grid); random(pRNG,chi);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
|
||||
double volume=1;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
volume=volume*latt_size[mu];
|
||||
}
|
||||
|
||||
// Only one non-zero (y)
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
/* Debug force unit
|
||||
U[mu] = 1.0;
|
||||
PokeIndex<LorentzIndex>(Umu,U[mu],mu);
|
||||
*/
|
||||
}
|
||||
|
||||
ref = zero;
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD c1=9.0/8.0;
|
||||
RealD c2=-1.0/24.0;
|
||||
RealD u0=1.0;
|
||||
|
||||
{ // Simple improved staggered implementation
|
||||
ref = zero;
|
||||
RealD c1tad = 0.5*c1/u0;
|
||||
RealD c2tad = 0.5*c2/u0/u0/u0;
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(&Grid);
|
||||
|
||||
Lattice<iScalar<vInteger> > x(&Grid); LatticeCoordinate(x,0);
|
||||
Lattice<iScalar<vInteger> > y(&Grid); LatticeCoordinate(y,1);
|
||||
Lattice<iScalar<vInteger> > z(&Grid); LatticeCoordinate(z,2);
|
||||
Lattice<iScalar<vInteger> > t(&Grid); LatticeCoordinate(t,3);
|
||||
|
||||
Lattice<iScalar<vInteger> > lin_z(&Grid); lin_z=x+y;
|
||||
Lattice<iScalar<vInteger> > lin_t(&Grid); lin_t=x+y+z;
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
// Staggered Phase.
|
||||
ComplexField phases(&Grid); phases=1.0;
|
||||
|
||||
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
|
||||
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
|
||||
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
|
||||
|
||||
tmp = PeriodicBC::CovShiftForward(U[mu],mu,src);
|
||||
ref = ref +c1tad*tmp*phases; // Forward 1 hop
|
||||
|
||||
tmp = PeriodicBC::CovShiftForward(U[mu],mu,tmp); // 2 hop
|
||||
tmp = PeriodicBC::CovShiftForward(U[mu],mu,tmp); // 3 hop
|
||||
ref = ref +c2tad*tmp*phases; // Forward 3 hop
|
||||
|
||||
tmp = PeriodicBC::CovShiftBackward(U[mu],mu,src);
|
||||
ref = ref -c1tad*tmp*phases; // Forward 3 hop
|
||||
|
||||
tmp = PeriodicBC::CovShiftBackward(U[mu],mu,tmp);
|
||||
tmp = PeriodicBC::CovShiftBackward(U[mu],mu,tmp);
|
||||
ref = ref -c2tad*tmp*phases; // Forward 3 hop
|
||||
}
|
||||
// ref = ref + mass * src;
|
||||
}
|
||||
|
||||
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Testing Dhop against cshift implementation "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
|
||||
int ncall=1000;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
Ds.Dhop(src,result,0);
|
||||
}
|
||||
double t1=usecond();
|
||||
double t2;
|
||||
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
|
||||
|
||||
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Testing that Deo + Doe = Dunprec "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
|
||||
FermionField src_e (&RBGrid);
|
||||
FermionField src_o (&RBGrid);
|
||||
FermionField r_e (&RBGrid);
|
||||
FermionField r_o (&RBGrid);
|
||||
FermionField r_eo (&Grid);
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
|
||||
Ds.Meooe(src_e,r_o); std::cout<<GridLogMessage<<"Applied Meo"<<std::endl;
|
||||
Ds.Meooe(src_o,r_e); std::cout<<GridLogMessage<<"Applied Moe"<<std::endl;
|
||||
Ds.Dhop (src,ref,DaggerNo);
|
||||
|
||||
setCheckerboard(r_eo,r_o);
|
||||
setCheckerboard(r_eo,r_e);
|
||||
|
||||
err= ref - r_eo;
|
||||
std::cout<<GridLogMessage << "EO norm diff "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test Ddagger is the dagger of D by requiring "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= < phi | Deo | chi > * = < chi | Deo^dag| phi> "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
FermionField chi_e (&RBGrid);
|
||||
FermionField chi_o (&RBGrid);
|
||||
|
||||
FermionField dchi_e (&RBGrid);
|
||||
FermionField dchi_o (&RBGrid);
|
||||
|
||||
FermionField phi_e (&RBGrid);
|
||||
FermionField phi_o (&RBGrid);
|
||||
|
||||
FermionField dphi_e (&RBGrid);
|
||||
FermionField dphi_o (&RBGrid);
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
pickCheckerboard(Even,phi_e,phi);
|
||||
pickCheckerboard(Odd ,phi_o,phi);
|
||||
|
||||
Ds.Meooe(chi_e,dchi_o);
|
||||
Ds.Meooe(chi_o,dchi_e);
|
||||
Ds.MeooeDag(phi_e,dphi_o);
|
||||
Ds.MeooeDag(phi_o,dphi_e);
|
||||
|
||||
ComplexD pDce = innerProduct(phi_e,dchi_e);
|
||||
ComplexD pDco = innerProduct(phi_o,dchi_o);
|
||||
ComplexD cDpe = innerProduct(chi_e,dphi_e);
|
||||
ComplexD cDpo = innerProduct(chi_o,dphi_o);
|
||||
|
||||
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test MeeInv Mee = 1 "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
|
||||
Ds.Mooee(chi_e,src_e);
|
||||
Ds.MooeeInv(src_e,phi_e);
|
||||
|
||||
Ds.Mooee(chi_o,src_o);
|
||||
Ds.MooeeInv(src_o,phi_o);
|
||||
|
||||
setCheckerboard(phi,phi_e);
|
||||
setCheckerboard(phi,phi_o);
|
||||
|
||||
err = phi-chi;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test MeeInvDag MeeDag = 1 "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
|
||||
Ds.MooeeDag(chi_e,src_e);
|
||||
Ds.MooeeInvDag(src_e,phi_e);
|
||||
|
||||
Ds.MooeeDag(chi_o,src_o);
|
||||
Ds.MooeeInvDag(src_o,phi_o);
|
||||
|
||||
setCheckerboard(phi,phi_e);
|
||||
setCheckerboard(phi,phi_o);
|
||||
|
||||
err = phi-chi;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test MpcDagMpc is Hermitian "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
random(pRNG,phi);
|
||||
random(pRNG,chi);
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
pickCheckerboard(Even,phi_e,phi);
|
||||
pickCheckerboard(Odd ,phi_o,phi);
|
||||
|
||||
SchurDiagMooeeOperator<ImprovedStaggeredFermionR,FermionField> HermOpEO(Ds);
|
||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
||||
|
||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
||||
|
||||
pDce = innerProduct(phi_e,dchi_e);
|
||||
pDco = innerProduct(phi_o,dchi_o);
|
||||
cDpe = innerProduct(chi_e,dphi_e);
|
||||
cDpo = innerProduct(chi_o,dphi_o);
|
||||
|
||||
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
309
tests/core/Test_staggered5D.cc
Normal file
309
tests/core/Test_staggered5D.cc
Normal file
@ -0,0 +1,309 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./benchmarks/Benchmark_wilson.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
||||
|
||||
const int Ls=16;
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG4(UGrid);
|
||||
GridParallelRNG pRNG5(FGrid);
|
||||
pRNG4.SeedFixedIntegers(seeds);
|
||||
pRNG5.SeedFixedIntegers(seeds);
|
||||
|
||||
typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField;
|
||||
typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;
|
||||
typename ImprovedStaggeredFermion5DR::ImplParams params;
|
||||
|
||||
FermionField src (FGrid);
|
||||
|
||||
random(pRNG5,src);
|
||||
|
||||
FermionField result(FGrid); result=zero;
|
||||
FermionField ref(FGrid); ref=zero;
|
||||
FermionField tmp(FGrid); tmp=zero;
|
||||
FermionField err(FGrid); tmp=zero;
|
||||
FermionField phi (FGrid); random(pRNG5,phi);
|
||||
FermionField chi (FGrid); random(pRNG5,chi);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::ColdConfiguration(pRNG4,Umu);
|
||||
LatticeGaugeField Umua(UGrid); Umua=Umu;
|
||||
|
||||
double volume=Ls;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
volume=volume*latt_size[mu];
|
||||
}
|
||||
|
||||
////////////////////////////////////
|
||||
// Naive implementation needs to
|
||||
// replicate across fifth dimension
|
||||
////////////////////////////////////
|
||||
LatticeGaugeField Umu5d(FGrid);
|
||||
for(int ss=0;ss<Umu._grid->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,FGrid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD c1=9.0/8.0;
|
||||
RealD c2=-1.0/24.0;
|
||||
RealD u0=1.0;
|
||||
|
||||
{ // Simple improved staggered implementation
|
||||
ref = zero;
|
||||
RealD c1tad = 0.5*c1/u0;
|
||||
RealD c2tad = 0.5*c2/u0/u0/u0;
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(FGrid);
|
||||
|
||||
Lattice<iScalar<vInteger> > x(FGrid); LatticeCoordinate(x,1); // s innermost
|
||||
Lattice<iScalar<vInteger> > y(FGrid); LatticeCoordinate(y,2);
|
||||
Lattice<iScalar<vInteger> > z(FGrid); LatticeCoordinate(z,3);
|
||||
Lattice<iScalar<vInteger> > t(FGrid); LatticeCoordinate(t,4);
|
||||
|
||||
Lattice<iScalar<vInteger> > lin_z(FGrid); lin_z=x+y;
|
||||
Lattice<iScalar<vInteger> > lin_t(FGrid); lin_t=x+y+z;
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
// Staggered Phase.
|
||||
ComplexField phases(FGrid); phases=1.0;
|
||||
|
||||
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
|
||||
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
|
||||
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
|
||||
|
||||
tmp = PeriodicBC::CovShiftForward(U[mu],mu+1,src);
|
||||
ref = ref +c1tad*tmp*phases; // Forward 1 hop
|
||||
|
||||
tmp = PeriodicBC::CovShiftForward(U[mu],mu+1,tmp); // 2 hop
|
||||
tmp = PeriodicBC::CovShiftForward(U[mu],mu+1,tmp); // 3 hop
|
||||
ref = ref +c2tad*tmp*phases; // Forward 3 hop
|
||||
|
||||
tmp = PeriodicBC::CovShiftBackward(U[mu],mu+1,src);
|
||||
ref = ref -c1tad*tmp*phases; // Forward 3 hop
|
||||
|
||||
tmp = PeriodicBC::CovShiftBackward(U[mu],mu+1,tmp);
|
||||
tmp = PeriodicBC::CovShiftBackward(U[mu],mu+1,tmp);
|
||||
ref = ref -c2tad*tmp*phases; // Forward 3 hop
|
||||
}
|
||||
}
|
||||
|
||||
ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0,params);
|
||||
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Testing Dhop against cshift implementation "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
|
||||
int ncall=100000;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
Ds.Dhop(src,result,0);
|
||||
}
|
||||
double t1=usecond();
|
||||
|
||||
double t2;
|
||||
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
|
||||
|
||||
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
|
||||
// std::cout<<GridLogMessage << "result"<< result <<std::endl;
|
||||
// std::cout<<GridLogMessage << "ref "<< ref <<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Testing that Deo + Doe = Dunprec "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
|
||||
FermionField src_e (FrbGrid);
|
||||
FermionField src_o (FrbGrid);
|
||||
FermionField r_e (FrbGrid);
|
||||
FermionField r_o (FrbGrid);
|
||||
FermionField r_eo (FGrid);
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
|
||||
Ds.Meooe(src_e,r_o); std::cout<<GridLogMessage<<"Applied Meo"<<std::endl;
|
||||
Ds.Meooe(src_o,r_e); std::cout<<GridLogMessage<<"Applied Moe"<<std::endl;
|
||||
Ds.Dhop (src,ref,DaggerNo);
|
||||
|
||||
setCheckerboard(r_eo,r_o);
|
||||
setCheckerboard(r_eo,r_e);
|
||||
|
||||
err= ref - r_eo;
|
||||
std::cout<<GridLogMessage << "EO norm diff "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test Ddagger is the dagger of D by requiring "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= < phi | Deo | chi > * = < chi | Deo^dag| phi> "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
FermionField chi_e (FrbGrid);
|
||||
FermionField chi_o (FrbGrid);
|
||||
|
||||
FermionField dchi_e (FrbGrid);
|
||||
FermionField dchi_o (FrbGrid);
|
||||
|
||||
FermionField phi_e (FrbGrid);
|
||||
FermionField phi_o (FrbGrid);
|
||||
|
||||
FermionField dphi_e (FrbGrid);
|
||||
FermionField dphi_o (FrbGrid);
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
pickCheckerboard(Even,phi_e,phi);
|
||||
pickCheckerboard(Odd ,phi_o,phi);
|
||||
|
||||
Ds.Meooe(chi_e,dchi_o);
|
||||
Ds.Meooe(chi_o,dchi_e);
|
||||
Ds.MeooeDag(phi_e,dphi_o);
|
||||
Ds.MeooeDag(phi_o,dphi_e);
|
||||
|
||||
ComplexD pDce = innerProduct(phi_e,dchi_e);
|
||||
ComplexD pDco = innerProduct(phi_o,dchi_o);
|
||||
ComplexD cDpe = innerProduct(chi_e,dphi_e);
|
||||
ComplexD cDpo = innerProduct(chi_o,dphi_o);
|
||||
|
||||
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test MeeInv Mee = 1 "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
|
||||
Ds.Mooee(chi_e,src_e);
|
||||
Ds.MooeeInv(src_e,phi_e);
|
||||
|
||||
Ds.Mooee(chi_o,src_o);
|
||||
Ds.MooeeInv(src_o,phi_o);
|
||||
|
||||
setCheckerboard(phi,phi_e);
|
||||
setCheckerboard(phi,phi_o);
|
||||
|
||||
err = phi-chi;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test MeeInvDag MeeDag = 1 "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
|
||||
Ds.MooeeDag(chi_e,src_e);
|
||||
Ds.MooeeInvDag(src_e,phi_e);
|
||||
|
||||
Ds.MooeeDag(chi_o,src_o);
|
||||
Ds.MooeeInvDag(src_o,phi_o);
|
||||
|
||||
setCheckerboard(phi,phi_e);
|
||||
setCheckerboard(phi,phi_o);
|
||||
|
||||
err = phi-chi;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test MpcDagMpc is Hermitian "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
random(pRNG5,phi);
|
||||
random(pRNG5,chi);
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
pickCheckerboard(Even,phi_e,phi);
|
||||
pickCheckerboard(Odd ,phi_o,phi);
|
||||
|
||||
SchurDiagMooeeOperator<ImprovedStaggeredFermion5DR,FermionField> HermOpEO(Ds);
|
||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
||||
|
||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
||||
|
||||
pDce = innerProduct(phi_e,dchi_e);
|
||||
pDco = innerProduct(phi_o,dchi_o);
|
||||
cDpe = innerProduct(chi_e,dphi_e);
|
||||
cDpo = innerProduct(chi_o,dphi_o);
|
||||
|
||||
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
184
tests/core/Test_staggered5Dvec.cc
Normal file
184
tests/core/Test_staggered5Dvec.cc
Normal file
@ -0,0 +1,184 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./benchmarks/Benchmark_wilson.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
const int Ls=16;
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
||||
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
|
||||
GridParallelRNG pRNG4(UGrid);
|
||||
GridParallelRNG pRNG5(FGrid);
|
||||
pRNG4.SeedFixedIntegers(seeds);
|
||||
pRNG5.SeedFixedIntegers(seeds);
|
||||
|
||||
typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField;
|
||||
typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;
|
||||
typename ImprovedStaggeredFermion5DR::ImplParams params;
|
||||
|
||||
FermionField src (FGrid);
|
||||
random(pRNG5,src);
|
||||
/*
|
||||
std::vector<int> site({0,1,2,0,0});
|
||||
ColourVector cv = zero;
|
||||
cv()()(0)=1.0;
|
||||
src = zero;
|
||||
pokeSite(cv,src,site);
|
||||
*/
|
||||
FermionField result(FGrid); result=zero;
|
||||
FermionField tmp(FGrid); tmp=zero;
|
||||
FermionField err(FGrid); tmp=zero;
|
||||
FermionField phi (FGrid); random(pRNG5,phi);
|
||||
FermionField chi (FGrid); random(pRNG5,chi);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(pRNG4,Umu);
|
||||
|
||||
/*
|
||||
for(int mu=1;mu<4;mu++){
|
||||
auto tmp = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
tmp = zero;
|
||||
PokeIndex<LorentzIndex>(Umu,tmp,mu);
|
||||
}
|
||||
*/
|
||||
double volume=Ls;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
volume=volume*latt_size[mu];
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD c1=9.0/8.0;
|
||||
RealD c2=-1.0/24.0;
|
||||
RealD u0=1.0;
|
||||
|
||||
ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0,params);
|
||||
ImprovedStaggeredFermionVec5dR sDs(Umu,Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,c1,c2,u0,params);
|
||||
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Testing Dhop against cshift implementation "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
|
||||
int ncall=1000;
|
||||
int ncall1=1000;
|
||||
double t0(0),t1(0);
|
||||
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
|
||||
|
||||
std::cout<<GridLogMessage << "Calling staggered operator"<<std::endl;
|
||||
t0=usecond();
|
||||
for(int i=0;i<ncall1;i++){
|
||||
Ds.Dhop(src,result,0);
|
||||
}
|
||||
t1=usecond();
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << "Calling vectorised staggered operator"<<std::endl;
|
||||
|
||||
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
|
||||
t0=usecond();
|
||||
for(int i=0;i<ncall1;i++){
|
||||
Ds.Dhop(src,tmp,0);
|
||||
}
|
||||
t1=usecond();
|
||||
|
||||
std::cout<<GridLogMessage << "Called Ds ASM"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(tmp)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
|
||||
err = tmp-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
|
||||
FermionField ssrc (sFGrid); localConvert(src,ssrc);
|
||||
FermionField sresult(sFGrid); sresult=zero;
|
||||
|
||||
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptHandUnroll;
|
||||
t0=usecond();
|
||||
for(int i=0;i<ncall1;i++){
|
||||
sDs.Dhop(ssrc,sresult,0);
|
||||
}
|
||||
t1=usecond();
|
||||
localConvert(sresult,tmp);
|
||||
|
||||
std::cout<<GridLogMessage << "Called sDs unroll"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(sresult)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
|
||||
|
||||
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
|
||||
|
||||
err = tmp-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
int extra=1;
|
||||
t0=usecond();
|
||||
for(int i=0;i<ncall1*extra;i++){
|
||||
sDs.Dhop(ssrc,sresult,0);
|
||||
}
|
||||
t1=usecond();
|
||||
localConvert(sresult,tmp);
|
||||
|
||||
std::cout<<GridLogMessage << "Called sDs asm"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(sresult)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)*extra<<std::endl;
|
||||
|
||||
err = tmp-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -71,7 +71,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref(&Grid); ref=zero;
|
||||
LatticeFermion tmp(&Grid); tmp=zero;
|
||||
LatticeFermion err(&Grid); tmp=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
double volume=1;
|
||||
|
@ -70,7 +70,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref(&Grid); ref=zero;
|
||||
LatticeFermion tmp(&Grid); tmp=zero;
|
||||
LatticeFermion err(&Grid); tmp=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
double volume=1;
|
||||
|
@ -77,7 +77,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref(FGrid); ref=zero;
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
#if 0
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
@ -70,7 +70,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
RealD mass=0.1;
|
||||
|
@ -187,7 +187,7 @@ int main(int argc,char **argv)
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
random(RNG5,src);
|
||||
#if 1
|
||||
random(RNG4,Umu);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
#else
|
||||
int mmu=2;
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -94,7 +94,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
RealD mass=0.1;
|
||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -65,7 +65,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
|
@ -60,7 +60,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
RealD nrm = norm2(src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
|
@ -57,7 +57,7 @@ int main (int argc, char ** argv)
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
|
@ -60,7 +60,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
RealD nrm = norm2(src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
|
||||
double volume=1;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -60,7 +60,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
RealD nrm = norm2(src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user