1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Internal SHM comms in non-simd directions working

Need to fix simd directions
This commit is contained in:
azusayamaguchi 2016-10-22 18:14:27 +01:00
parent 0fcd2e7188
commit c190221fd3
16 changed files with 1729 additions and 1739 deletions

View File

@ -153,7 +153,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl; std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
err = ref-result; err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl; std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
Dw.Report(); Dw.Report();
@ -192,7 +192,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
sDw.Report(); sDw.Report();
if(0){ if(0){
@ -262,7 +262,7 @@ int main (int argc, char ** argv)
double flops=(1344.0*volume*ncall)/2; double flops=(1344.0*volume*ncall)/2;
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
sDw.Report(); sDw.Report();
sDw.DhopEO(ssrc_o,sr_e,DaggerNo); sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
@ -333,7 +333,7 @@ int main (int argc, char ** argv)
double flops=(1344.0*volume*ncall)/2; double flops=(1344.0*volume*ncall)/2;
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
Dw.Report(); Dw.Report();
} }
Dw.DhopEO(src_o,r_e,DaggerNo); Dw.DhopEO(src_o,r_e,DaggerNo);

File diff suppressed because it is too large Load Diff

View File

@ -80,7 +80,6 @@ class CartesianCommunicator {
void * ShmCommBuf; void * ShmCommBuf;
std::vector<void *> ShmCommBufs; std::vector<void *> ShmCommBufs;
std::vector<void *> ShmStencilBufs;
int WorldRank; int WorldRank;
int WorldSize; int WorldSize;
@ -105,6 +104,10 @@ class CartesianCommunicator {
int RankFromProcessorCoor(std::vector<int> &coor); int RankFromProcessorCoor(std::vector<int> &coor);
void ProcessorCoorFromRank(int rank,std::vector<int> &coor); void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
// Helper function for SHM Windows in MPI3
void *ShmBufferSelf(void);
void *ShmBuffer(int rank);
///////////////////////////////// /////////////////////////////////
// Grid information queries // Grid information queries
///////////////////////////////// /////////////////////////////////
@ -173,6 +176,16 @@ class CartesianCommunicator {
int recv_from_rank, int recv_from_rank,
int bytes); int bytes);
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
{
SendToRecvFromComplete(waitall);
}
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
// Barrier // Barrier

View File

@ -67,6 +67,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
assert(Size==_Nprocessors); assert(Size==_Nprocessors);
} }
void *CartesianCommunicator::ShmBufferSelf(void)
{
return NULL;
}
void *CartesianCommunicator::ShmBuffer(int rank)
{
return NULL;
}
void CartesianCommunicator::GlobalSum(uint32_t &u){ void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);

View File

@ -197,10 +197,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Verbose for now // Verbose for now
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout<< "Ranks per node "<< ShmSize << std::endl; std::cout<<GridLogMessage<< "MPI-3 configuration: Ranks per node "<< ShmSize ;
std::cout<< "Nodes "<< GroupSize << std::endl; std::cout<< " Nodes "<< GroupSize;
std::cout<< "Ranks "<< WorldSize << std::endl; std::cout<< " Ranks "<< WorldSize;
std::cout<< "Shm CommBuf "<< ShmCommBuf << std::endl; std::cout<< " Shm CommBuf address"<< std::hex <<ShmCommBuf << std::dec<<std::endl;
// Done // Done
ShmSetup=1; ShmSetup=1;
@ -208,12 +208,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
} }
ShmCommBufs.resize(ShmSize); ShmCommBufs.resize(ShmSize);
ShmStencilBufs.resize(ShmSize);
for(int r=0;r<ShmSize;r++){ for(int r=0;r<ShmSize;r++){
MPI_Aint sz; MPI_Aint sz;
int dsp_unit; int dsp_unit;
MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]); MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]);
ShmStencilBufs[r] = (void *) ((uint64_t)ShmCommBufs[r]+MAX_MPI_SHM_BYTES/4);
} }
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
@ -240,6 +238,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
ShmCoor.resize(_ndimension); ShmCoor.resize(_ndimension);
GroupCoor.resize(_ndimension); GroupCoor.resize(_ndimension);
WorldCoor.resize(_ndimension); WorldCoor.resize(_ndimension);
for(int l2=0;l2<log2size;l2++){ for(int l2=0;l2<log2size;l2++){
while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension; while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
ShmDims[dim]*=2; ShmDims[dim]*=2;
@ -347,6 +346,21 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
} }
} }
void *CartesianCommunicator::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
}
void *CartesianCommunicator::ShmBuffer(int rank)
{
int gpeer = GroupRanks[rank];
if (gpeer == MPI_UNDEFINED){
return NULL;
} else {
return ShmCommBufs[gpeer];
}
}
// Basic Halo comms primitive // Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,
@ -355,13 +369,11 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int from, int from,
int bytes) int bytes)
{ {
#undef SHM_USE_BCOPY
MPI_Request xrq; MPI_Request xrq;
MPI_Request rrq; MPI_Request rrq;
static int sequence; static int sequence;
int rank = _processor;
int ierr; int ierr;
int tag; int tag;
int check; int check;
@ -370,6 +382,7 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
assert(from != _processor); assert(from != _processor);
int gdest = GroupRanks[dest]; int gdest = GroupRanks[dest];
int gfrom = GroupRanks[from];
int gme = GroupRanks[_processor]; int gme = GroupRanks[_processor];
sequence++; sequence++;
@ -379,30 +392,23 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int small = (bytes<MAX_MPI_SHM_BYTES); int small = (bytes<MAX_MPI_SHM_BYTES);
#ifndef SHM_USE_BCOPY
typedef vRealD T; typedef vRealD T;
int words = bytes/sizeof(T); int words = bytes/sizeof(T);
assert(((size_t)bytes &(sizeof(T)-1))==0);
// assert(((size_t)xmit &(sizeof(T)-1))==0);
// assert(((size_t)recv &(sizeof(T)-1))==0);
#endif
assert(((size_t)bytes &(sizeof(T)-1))==0);
assert(gme == ShmRank); assert(gme == ShmRank);
// std::cerr << "proc dest from gme gdest "<<_processor<<" "<<dest <<" "<< from <<" "<<gme<<" "<< gdest<<std::endl; Barrier(); if ( small && (gdest !=MPI_UNDEFINED) ) {
if ( small && (dest !=MPI_UNDEFINED) ) {
assert(gme != gdest); assert(gme != gdest);
#ifdef SHM_USE_BCOPY
bcopy(xmit,to_ptr,bytes);
#else
T *ip = (T *)xmit; T *ip = (T *)xmit;
T *op = (T *)to_ptr; T *op = (T *)to_ptr;
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int w=0;w<words;w++) { for(int w=0;w<words;w++) {
vstream(op[w],ip[w]); vstream(op[w],ip[w]);
} }
#endif
bcopy(&_processor,&to_ptr[bytes],sizeof(_processor)); bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
bcopy(& sequence,&to_ptr[bytes+4],sizeof(sequence)); bcopy(& sequence,&to_ptr[bytes+4],sizeof(sequence));
} else { } else {
@ -411,24 +417,17 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
list.push_back(xrq); list.push_back(xrq);
} }
// std::cout << "Syncing "<<std::endl; Barrier();
MPI_Win_sync (ShmWindow); MPI_Win_sync (ShmWindow);
MPI_Barrier (ShmComm); MPI_Barrier (ShmComm);
MPI_Win_sync (ShmWindow); MPI_Win_sync (ShmWindow);
// std::cout << "Receiving "<<std::endl; Barrier(); if (small && (gfrom !=MPI_UNDEFINED) ) {
if (small && (from !=MPI_UNDEFINED) ) {
#ifdef SHM_USE_BCOPY
bcopy(from_ptr,recv,bytes);
#else
T *ip = (T *)from_ptr; T *ip = (T *)from_ptr;
T *op = (T *)recv; T *op = (T *)recv;
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int w=0;w<words;w++) { for(int w=0;w<words;w++) {
vstream(op[w],ip[w]); vstream(op[w],ip[w]);
} }
#endif
bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag)); bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag));
bcopy(&from_ptr[bytes+4],&check,sizeof(check)); bcopy(&from_ptr[bytes+4],&check,sizeof(check));
assert(check==sequence); assert(check==sequence);
@ -438,28 +437,52 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
assert(ierr==0); assert(ierr==0);
list.push_back(rrq); list.push_back(rrq);
} }
// std::cout << "Syncing"<<std::endl; Barrier();
MPI_Win_sync (ShmWindow); MPI_Win_sync (ShmWindow);
MPI_Barrier (ShmComm); MPI_Barrier (ShmComm);
MPI_Win_sync (ShmWindow); MPI_Win_sync (ShmWindow);
}
#if 0 void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
MPI_Request xrq; MPI_Request xrq;
MPI_Request rrq; MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq); int ierr;
list.push_back(rrq);
#endif assert(dest != _processor);
assert(from != _processor);
int gdest = GroupRanks[dest];
int gfrom = GroupRanks[from];
int gme = GroupRanks[_processor];
assert(gme == ShmRank);
if ( gdest == MPI_UNDEFINED ) {
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0);
list.push_back(xrq);
}
if ( gfrom ==MPI_UNDEFINED) {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(rrq);
}
MPI_Win_sync (ShmWindow);
MPI_Barrier (ShmComm);
MPI_Win_sync (ShmWindow);
} }
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {
int nreq=list.size(); int nreq=list.size();

View File

@ -33,6 +33,14 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
} }
int Rank(void ){ return 0; }; int Rank(void ){ return 0; };
void *CartesianCommunicator::ShmBufferSelf(void)
{
return NULL;
}
void *CartesianCommunicator::ShmBuffer(int rank)
{
return NULL;
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{ {

View File

@ -50,6 +50,14 @@ typedef struct HandShake_t {
static Vector< HandShake > XConnections; static Vector< HandShake > XConnections;
static Vector< HandShake > RConnections; static Vector< HandShake > RConnections;
void *CartesianCommunicator::ShmBufferSelf(void)
{
return NULL;
}
void *CartesianCommunicator::ShmBuffer(int rank)
{
return NULL;
}
void CartesianCommunicator::Init(int *argc, char ***argv) { void CartesianCommunicator::Init(int *argc, char ***argv) {
shmem_init(); shmem_init();
XConnections.resize(shmem_n_pes()); XConnections.resize(shmem_n_pes());

View File

@ -33,511 +33,500 @@ directory
#define GRID_QCD_FERMION_OPERATOR_IMPL_H #define GRID_QCD_FERMION_OPERATOR_IMPL_H
namespace Grid { namespace Grid {
namespace QCD {
namespace QCD {
////////////////////////////////////////////// //////////////////////////////////////////////
// Template parameter class constructs to package // Template parameter class constructs to package
// externally control Fermion implementations // externally control Fermion implementations
// in orthogonal directions // in orthogonal directions
// //
// Ultimately need Impl to always define types where XXX is opaque // Ultimately need Impl to always define types where XXX is opaque
// //
// typedef typename XXX Simd; // typedef typename XXX Simd;
// typedef typename XXX GaugeLinkField; // typedef typename XXX GaugeLinkField;
// typedef typename XXX GaugeField; // typedef typename XXX GaugeField;
// typedef typename XXX GaugeActField; // typedef typename XXX GaugeActField;
// typedef typename XXX FermionField; // typedef typename XXX FermionField;
// typedef typename XXX DoubledGaugeField; // typedef typename XXX DoubledGaugeField;
// typedef typename XXX SiteSpinor; // typedef typename XXX SiteSpinor;
// typedef typename XXX SiteHalfSpinor; // typedef typename XXX SiteHalfSpinor;
// typedef typename XXX Compressor; // typedef typename XXX Compressor;
// //
// and Methods: // and Methods:
// void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) // void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
// void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) // void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
// void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St) // void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
// void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) // void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
// void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) // void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
// //
// //
// To acquire the typedefs from "Base" (either a base class or template param) use: // To acquire the typedefs from "Base" (either a base class or template param) use:
// //
// INHERIT_GIMPL_TYPES(Base) // INHERIT_GIMPL_TYPES(Base)
// INHERIT_FIMPL_TYPES(Base) // INHERIT_FIMPL_TYPES(Base)
// INHERIT_IMPL_TYPES(Base) // INHERIT_IMPL_TYPES(Base)
// //
// The Fermion operators will do the following: // The Fermion operators will do the following:
// //
// struct MyOpParams { // struct MyOpParams {
// RealD mass; // RealD mass;
// }; // };
// //
// //
// template<class Impl> // template<class Impl>
// class MyOp : public<Impl> { // class MyOp : public<Impl> {
// public: // public:
// //
// INHERIT_ALL_IMPL_TYPES(Impl); // INHERIT_ALL_IMPL_TYPES(Impl);
// //
// MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam) // MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam)
// { // {
// //
// }; // };
// //
// } // }
////////////////////////////////////////////// //////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// Implementation dependent fermion types
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// Implementation dependent fermion types
////////////////////////////////////////////////////////////////////////
#define INHERIT_FIMPL_TYPES(Impl)\ #define INHERIT_FIMPL_TYPES(Impl)\
typedef typename Impl::FermionField FermionField; \ typedef typename Impl::FermionField FermionField; \
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \ typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
typedef typename Impl::SiteSpinor SiteSpinor; \ typedef typename Impl::SiteSpinor SiteSpinor; \
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
typedef typename Impl::Compressor Compressor; \ typedef typename Impl::Compressor Compressor; \
typedef typename Impl::StencilImpl StencilImpl; \ typedef typename Impl::StencilImpl StencilImpl; \
typedef typename Impl::ImplParams ImplParams; \ typedef typename Impl::ImplParams ImplParams; \
typedef typename Impl::Coeff_t Coeff_t; typedef typename Impl::Coeff_t Coeff_t;
#define INHERIT_IMPL_TYPES(Base) \ #define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base) \ INHERIT_GIMPL_TYPES(Base) \
INHERIT_FIMPL_TYPES(Base) INHERIT_FIMPL_TYPES(Base)
/////////////////////////////////////////////////////////////////////////////
// Single flavour four spinors with colour index
/////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
class WilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
static const int Dimension = Representation::Dimension;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
/////// const bool LsVectorised=false;
// Single flavour four spinors with colour index typedef _Coeff_t Coeff_t;
///////
template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
class WilsonImpl
: public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
static const int Dimension = Representation::Dimension;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=false; INHERIT_GIMPL_TYPES(Gimpl);
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >; template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >; template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
ImplParams Params;
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
typedef iImplSpinor<Simd> SiteSpinor; bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField; inline void multLink(SiteHalfSpinor &phi,
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi,
int mu,
StencilEntry *SE,
StencilImpl &St) {
mult(&phi(), &U(mu), &chi());
}
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; template <class ref>
typedef WilsonImplParams ImplParams; inline void loadLinkElement(Simd &reg, ref &memory) {
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl; reg = memory;
}
ImplParams Params; inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &Uds,
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; const GaugeField &Umu) {
conformable(Uds._grid, GaugeGrid);
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; conformable(Umu._grid, GaugeGrid);
GaugeLinkField U(GaugeGrid);
inline void multLink(SiteHalfSpinor &phi, for (int mu = 0; mu < Nd; mu++) {
const SiteDoubledGaugeField &U, U = PeekIndex<LorentzIndex>(Umu, mu);
const SiteHalfSpinor &chi, PokeIndex<LorentzIndex>(Uds, U, mu);
int mu, U = adj(Cshift(U, mu, -1));
StencilEntry *SE, PokeIndex<LorentzIndex>(Uds, U, mu + 4);
StencilImpl &St) {
mult(&phi(), &U(mu), &chi());
} }
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat._grid);
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
template <class ref> inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
inline void loadLinkElement(Simd &reg,
ref &memory) {
reg = memory;
}
inline void DoubleStore(GridBase *GaugeGrid, int Ls=Btilde._grid->_fdimensions[0];
DoubledGaugeField &Uds, GaugeLinkField tmp(mat._grid);
const GaugeField &Umu) { tmp = zero;
conformable(Uds._grid, GaugeGrid);
conformable(Umu._grid, GaugeGrid); PARALLEL_FOR_LOOP
GaugeLinkField U(GaugeGrid); for(int sss=0;sss<tmp._grid->oSites();sss++){
for (int mu = 0; mu < Nd; mu++) { int sU=sss;
U = PeekIndex<LorentzIndex>(Umu, mu); for(int s=0;s<Ls;s++){
PokeIndex<LorentzIndex>(Uds, U, mu); int sF = s+Ls*sU;
U = adj(Cshift(U, mu, -1)); tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
} }
} }
PokeIndex<LorentzIndex>(mat,tmp,mu);
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat._grid);
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){ }
};
int Ls=Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid);
tmp = zero;
PARALLEL_FOR_LOOP ////////////////////////////////////////////////////////////////////////////////////
for(int sss=0;sss<tmp._grid->oSites();sss++){ // Single flavour four spinors with colour index, 5d redblack
int sU=sss; ////////////////////////////////////////////////////////////////////////////////////
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
}
}
PokeIndex<LorentzIndex>(mat,tmp,mu);
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
public:
static const int Dimension = Nrepresentation;
const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef Lattice<SiteSpinor> FermionField;
// Make the doubled gauge field a *scalar*
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
ImplParams Params;
DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
bool overlapCommsCompute(void) { return false; };
template <class ref>
inline void loadLinkElement(Simd &reg, ref &memory) {
vsplat(reg, memory);
}
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
SiteGaugeLink UU;
for (int i = 0; i < Nrepresentation; i++) {
for (int j = 0; j < Nrepresentation; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
} }
}; }
mult(&phi(), &UU(), &chi());
/////// }
// Single flavour four spinors with colour index, 5d redblack
///////
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
public:
static const int Dimension = Nrepresentation; inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu)
const bool LsVectorised=true; {
typedef _Coeff_t Coeff_t; SiteScalarGaugeField ScalarUmu;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl; SiteDoubledGaugeField ScalarUds;
INHERIT_GIMPL_TYPES(Gimpl); GaugeLinkField U(Umu._grid);
GaugeField Uadj(Umu._grid);
for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
U = adj(Cshift(U, mu, -1));
PokeIndex<LorentzIndex>(Uadj, U, mu);
}
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
std::vector<int> lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >; peekLocalSite(ScalarUmu, Umu, lcoor);
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >; for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
typedef iImplSpinor<Simd> SiteSpinor; peekLocalSite(ScalarUmu, Uadj, lcoor);
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
typedef Lattice<SiteSpinor> FermionField;
// Make the doubled gauge field a *scalar* pokeLocalSite(ScalarUds, Uds, lcoor);
typedef iImplDoubledGaugeField<typename Simd::scalar_type> }
SiteDoubledGaugeField; // This is a scalar }
typedef iImplGaugeField<typename Simd::scalar_type>
SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type>
SiteScalarGaugeLink; // scalar
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu)
{
assert(0);
}
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField &Atilde, int mu)
typedef WilsonImplParams ImplParams; {
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
ImplParams Params;
DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
bool overlapCommsCompute(void) { return false; };
template <class ref>
inline void loadLinkElement(Simd &reg, ref &memory) {
vsplat(reg, memory);
}
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
SiteGaugeLink UU;
for (int i = 0; i < Nrepresentation; i++) {
for (int j = 0; j < Nrepresentation; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mult(&phi(), &UU(), &chi());
}
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,
const GaugeField &Umu) {
SiteScalarGaugeField ScalarUmu;
SiteDoubledGaugeField ScalarUds;
GaugeLinkField U(Umu._grid);
GaugeField Uadj(Umu._grid);
for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
U = adj(Cshift(U, mu, -1));
PokeIndex<LorentzIndex>(Uadj, U, mu);
}
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
std::vector<int> lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUmu, Umu, lcoor);
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
peekLocalSite(ScalarUmu, Uadj, lcoor);
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
pokeLocalSite(ScalarUds, Uds, lcoor);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
FermionField &A, int mu) {
assert(0); assert(0);
} }
};
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
FermionField &Atilde, int mu) {
assert(0);
}
};
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
// Flavour doubled spinors; is Gparity the only? what about C*? // Flavour doubled spinors; is Gparity the only? what about C*?
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
template <class S, int Nrepresentation,class _Coeff_t = RealD> template <class S, int Nrepresentation,class _Coeff_t = RealD>
class GparityWilsonImpl class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
: public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > { public:
public:
static const int Dimension = Nrepresentation;
const bool LsVectorised=false; static const int Dimension = Nrepresentation;
typedef _Coeff_t Coeff_t; const bool LsVectorised=false;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
typedef _Coeff_t Coeff_t;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
INHERIT_GIMPL_TYPES(Gimpl); template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
template <typename vtype> typedef iImplSpinor<Simd> SiteSpinor;
using iImplSpinor = typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>; typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
template <typename vtype>
using iImplHalfSpinor = typedef Lattice<SiteSpinor> FermionField;
iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
template <typename vtype>
using iImplDoubledGaugeField = typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>; typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
typedef GparityWilsonImplParams ImplParams;
typedef iImplSpinor<Simd> SiteSpinor; ImplParams Params;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
typedef GparityWilsonImplParams ImplParams; GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
ImplParams Params;
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; // provide the multiply by link that is differentiated between Gparity (with
// flavour index) and non-Gparity
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; typedef SiteHalfSpinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj;
// provide the multiply by link that is differentiated between Gparity (with
// flavour index) and non-Gparity
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
typedef SiteHalfSpinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj;
vobj vtmp; vobj vtmp;
sobj stmp; sobj stmp;
GridBase *grid = St._grid; GridBase *grid = St._grid;
const int Nsimd = grid->Nsimd(); const int Nsimd = grid->Nsimd();
int direction = St._directions[mu]; int direction = St._directions[mu];
int distance = St._distances[mu]; int distance = St._distances[mu];
int ptype = St._permute_type[mu]; int ptype = St._permute_type[mu];
int sl = St._grid->_simd_layout[direction]; int sl = St._grid->_simd_layout[direction];
// Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd;
// Fixme X.Y.Z.T hardcode in stencil // assert our assumptions
int mmu = mu % Nd; assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2));
std::vector<int> icoor;
// assert our assumptions if ( SE->_around_the_world && Params.twists[mmu] ) {
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2));
std::vector<int> icoor;
if ( SE->_around_the_world && Params.twists[mmu] ) {
if ( sl == 2 ) { if ( sl == 2 ) {
std::vector<sobj> vals(Nsimd);
std::vector<sobj> vals(Nsimd); extract(chi,vals);
for(int s=0;s<Nsimd;s++){
extract(chi,vals); grid->iCoorFromIindex(icoor,s);
for(int s=0;s<Nsimd;s++){
grid->iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1)); assert((icoor[direction]==0)||(icoor[direction]==1));
int permute_lane; int permute_lane;
if ( distance == 1) { if ( distance == 1) {
permute_lane = icoor[direction]?1:0; permute_lane = icoor[direction]?1:0;
} else { } else {
permute_lane = icoor[direction]?0:1; permute_lane = icoor[direction]?0:1;
}
if ( permute_lane ) {
stmp(0) = vals[s](1);
stmp(1) = vals[s](0);
vals[s] = stmp;
} }
}
if ( permute_lane ) { merge(vtmp,vals);
stmp(0) = vals[s](1);
stmp(1) = vals[s](0); } else {
vals[s] = stmp; vtmp(0) = chi(1);
} vtmp(1) = chi(0);
} }
merge(vtmp,vals); mult(&phi(0),&U(0)(mu),&vtmp(0));
mult(&phi(1),&U(1)(mu),&vtmp(1));
} else {
mult(&phi(0),&U(0)(mu),&chi(0));
mult(&phi(1),&U(1)(mu),&chi(1));
}
}
} else { inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
vtmp(0) = chi(1); {
vtmp(1) = chi(0); conformable(Uds._grid,GaugeGrid);
} conformable(Umu._grid,GaugeGrid);
mult(&phi(0),&U(0)(mu),&vtmp(0));
mult(&phi(1),&U(1)(mu),&vtmp(1)); GaugeLinkField Utmp (GaugeGrid);
GaugeLinkField U (GaugeGrid);
} else { GaugeLinkField Uconj(GaugeGrid);
mult(&phi(0),&U(0)(mu),&chi(0));
mult(&phi(1),&U(1)(mu),&chi(1)); Lattice<iScalar<vInteger> > coor(GaugeGrid);
}
} for(int mu=0;mu<Nd;mu++){
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds._grid,GaugeGrid);
conformable(Umu._grid,GaugeGrid);
GaugeLinkField Utmp (GaugeGrid);
GaugeLinkField U (GaugeGrid);
GaugeLinkField Uconj(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu); LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu); U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U); Uconj = conjugate(U);
// This phase could come from a simple bc 1,1,-1,1 ..
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
if ( Params.twists[mu] ) {
Uconj = where(coor==neglink,-Uconj,Uconj);
}
// This phase could come from a simple bc 1,1,-1,1 .. PARALLEL_FOR_LOOP
int neglink = GaugeGrid->GlobalDimensions()[mu]-1; for(auto ss=U.begin();ss<U.end();ss++){
if ( Params.twists[mu] ) { Uds[ss](0)(mu) = U[ss]();
Uconj = where(coor==neglink,-Uconj,Uconj); Uds[ss](1)(mu) = Uconj[ss]();
} }
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
Uconj = adj(Cshift(Uconj,mu,-1));
Utmp = U;
if ( Params.twists[mu] ) {
Utmp = where(coor==0,Uconj,Utmp);
}
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){ for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu) = U[ss](); Uds[ss](0)(mu+4) = Utmp[ss]();
Uds[ss](1)(mu) = Uconj[ss](); }
}
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary Utmp = Uconj;
Uconj = adj(Cshift(Uconj,mu,-1)); if ( Params.twists[mu] ) {
Utmp = where(coor==0,U,Utmp);
}
Utmp = U; PARALLEL_FOR_LOOP
if ( Params.twists[mu] ) { for(auto ss=U.begin();ss<U.end();ss++){
Utmp = where(coor==0,Uconj,Utmp); Uds[ss](1)(mu+4) = Utmp[ss]();
} }
PARALLEL_FOR_LOOP }
for(auto ss=U.begin();ss<U.end();ss++){ }
Uds[ss](0)(mu+4) = Utmp[ss]();
}
Utmp = Uconj;
if ( Params.twists[mu] ) {
Utmp = where(coor==0,U,Utmp);
}
PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](1)(mu+4) = Utmp[ss]();
}
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
FermionField &A, int mu) {
// DhopDir provides U or Uconj depending on coor/flavour. // DhopDir provides U or Uconj depending on coor/flavour.
GaugeLinkField link(mat._grid); GaugeLinkField link(mat._grid);
// use lorentz for flavour as hack. // use lorentz for flavour as hack.
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A)); auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (auto ss = tmp.begin(); ss < tmp.end(); ss++) { for (auto ss = tmp.begin(); ss < tmp.end(); ss++) {
link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1)); link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
} }
PokeIndex<LorentzIndex>(mat, link, mu); PokeIndex<LorentzIndex>(mat, link, mu);
return; return;
} }
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
FermionField &Atilde, int mu) {
int Ls = Btilde._grid->_fdimensions[0]; int Ls = Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid); GaugeLinkField tmp(mat._grid);
tmp = zero; tmp = zero;
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (int ss = 0; ss < tmp._grid->oSites(); ss++) { for (int ss = 0; ss < tmp._grid->oSites(); ss++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
int sF = s + Ls * ss; int sF = s + Ls * ss;
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])); auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
} }
} }
PokeIndex<LorentzIndex>(mat, tmp, mu); PokeIndex<LorentzIndex>(mat, tmp, mu);
return; return;
} }
};
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec };
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
typedef GparityWilsonImpl<vComplex , Nc> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec }}
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
typedef GparityWilsonImpl<vComplex, Nc> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
}
}
#endif #endif

View File

@ -166,7 +166,7 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
//////////////////////// ////////////////////////
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (int sss = 0; sss < B._grid->oSites(); sss++) { for (int sss = 0; sss < B._grid->oSites(); sss++) {
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sss, sss, B, Btilde, mu, Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu,
gamma); gamma);
} }
@ -277,7 +277,7 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) { for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.comm_buf, sss, sss, in, out, Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out,
dirdisp, gamma); dirdisp, gamma);
} }
}; };
@ -295,13 +295,13 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
if (dag == DaggerYes) { if (dag == DaggerYes) {
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) { for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sss, sss, 1, 1, in, Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
out); out);
} }
} else { } else {
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) { for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sss, sss, 1, 1, in, Kernels::DiracOptDhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
out); out);
} }
} }

View File

@ -184,44 +184,37 @@ void WilsonFermion5D<Impl>::Report(void)
if ( DhopCalls > 0 ) { if ( DhopCalls > 0 ) {
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl; std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime<< " us" << std::endl;
<< " us" << std::endl; std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " << DhopCommTime / DhopCalls << " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " << DhopComputeTime << " us" << std::endl;
<< DhopCommTime / DhopCalls << " us" << std::endl; std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : "
<< DhopComputeTime << " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : "
<< DhopComputeTime / DhopCalls << " us" << std::endl;
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
} }
if ( DerivCalls > 0 ) { if ( DerivCalls > 0 ) {
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl; std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl; std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl; std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl; std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " <<DerivComputeTime <<" us"<<std::endl; std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " <<DerivComputeTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl; std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time : " <<DerivDhopComputeTime <<" us"<<std::endl; std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time : " <<DerivDhopComputeTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl; std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime; std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
} }
if (DerivCalls > 0 || DhopCalls > 0){ if (DerivCalls > 0 || DhopCalls > 0){
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl; Stencil.Report(); std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl; Stencil.Report();
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report(); std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl; StencilOdd.Report(); std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl; StencilOdd.Report();
} }
} }
@ -275,7 +268,7 @@ PARALLEL_FOR_LOOP
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
int sU=ss; int sU=ss;
int sF = s+Ls*sU; int sF = s+Ls*sU;
Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sF,sU,in,out,dirdisp,gamma); Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
} }
} }
}; };
@ -327,8 +320,7 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
assert(sF < B._grid->oSites()); assert(sF < B._grid->oSites());
assert(sU < U._grid->oSites()); assert(sU < U._grid->oSites());
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu, Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
gamma);
//////////////////////////// ////////////////////////////
// spin trace outer product // spin trace outer product
@ -342,10 +334,10 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
} }
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat, void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionGrid()); conformable(A._grid,FermionGrid());
conformable(A._grid,B._grid); conformable(A._grid,B._grid);
@ -358,9 +350,9 @@ void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat, void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionRedBlackGrid()); conformable(A._grid,FermionRedBlackGrid());
conformable(GaugeRedBlackGrid(),mat._grid); conformable(GaugeRedBlackGrid(),mat._grid);
@ -376,9 +368,9 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat, void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionRedBlackGrid()); conformable(A._grid,FermionRedBlackGrid());
conformable(GaugeRedBlackGrid(),mat._grid); conformable(GaugeRedBlackGrid(),mat._grid);
@ -393,8 +385,8 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo, void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U, DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag) const FermionField &in, FermionField &out,int dag)
{ {
// assert((dag==DaggerNo) ||(dag==DaggerYes)); // assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag); Compressor compressor(dag);
@ -412,27 +404,25 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
for (int ss = 0; ss < U._grid->oSites(); ss++) { for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss; int sU = ss;
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in, Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sF, sU, LLs, 1, in, out);
out);
} }
#ifdef AVX512 #ifdef AVX512
} else if (stat.is_init() ) { } else if (stat.is_init() ) {
int nthreads; int nthreads;
stat.start(); stat.start();
#pragma omp parallel #pragma omp parallel
{ {
#pragma omp master #pragma omp master
nthreads = omp_get_num_threads(); nthreads = omp_get_num_threads();
int mythread = omp_get_thread_num(); int mythread = omp_get_thread_num();
stat.enter(mythread); stat.enter(mythread);
#pragma omp for nowait #pragma omp for nowait
for(int ss=0;ss<U._grid->oSites();ss++) for(int ss=0;ss<U._grid->oSites();ss++) {
{ int sU=ss;
int sU=ss; int sF=LLs*sU;
int sF=LLs*sU; Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out); }
}
stat.exit(mythread); stat.exit(mythread);
} }
stat.accum(nthreads); stat.accum(nthreads);
@ -442,8 +432,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
for (int ss = 0; ss < U._grid->oSites(); ss++) { for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss; int sU = ss;
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in, Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
out);
} }
} }
DhopComputeTime+=usecond(); DhopComputeTime+=usecond();

View File

@ -34,155 +34,154 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/Stat.h> #include <Grid/Stat.h>
namespace Grid { namespace Grid {
namespace QCD {
namespace QCD { ////////////////////////////////////////////////////////////////////////////////
// This is the 4d red black case appropriate to support
//
// parity = (x+y+z+t)|2;
// generalised five dim fermions like mobius, zolotarev etc..
//
// i.e. even even contains fifth dim hopping term.
//
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////// class WilsonFermion5DStatic {
// This is the 4d red black case appropriate to support public:
// // S-direction is INNERMOST and takes no part in the parity.
// parity = (x+y+z+t)|2; static const std::vector<int> directions;
// generalised five dim fermions like mobius, zolotarev etc.. static const std::vector<int> displacements;
// const int npoint = 8;
// i.e. even even contains fifth dim hopping term. };
//
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ] template<class Impl>
//////////////////////////////////////////////////////////////////////////////// class WilsonFermion5D : public WilsonKernels<Impl>, public WilsonFermion5DStatic
{
class WilsonFermion5DStatic { public:
public: INHERIT_IMPL_TYPES(Impl);
// S-direction is INNERMOST and takes no part in the parity. typedef WilsonKernels<Impl> Kernels;
static const std::vector<int> directions; PmuStat stat;
static const std::vector<int> displacements;
const int npoint = 8; void Report(void);
}; void ZeroCounters(void);
double DhopCalls;
template<class Impl> double DhopCommTime;
class WilsonFermion5D : public WilsonKernels<Impl>, public WilsonFermion5DStatic double DhopComputeTime;
{
public: double DerivCalls;
INHERIT_IMPL_TYPES(Impl); double DerivCommTime;
typedef WilsonKernels<Impl> Kernels; double DerivComputeTime;
PmuStat stat; double DerivDhopComputeTime;
void Report(void); ///////////////////////////////////////////////////////////////
void ZeroCounters(void); // Implement the abstract base
double DhopCalls; ///////////////////////////////////////////////////////////////
double DhopCommTime; GridBase *GaugeGrid(void) { return _FourDimGrid ;}
double DhopComputeTime; GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FiveDimGrid;}
double DerivCalls; GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
double DerivCommTime;
double DerivComputeTime; // full checkerboard operations; leave unimplemented as abstract for now
double DerivDhopComputeTime; virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;};
virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
///////////////////////////////////////////////////////////////
// Implement the abstract base // half checkerboard operations; leave unimplemented as abstract for now
/////////////////////////////////////////////////////////////// virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
GridBase *GaugeGrid(void) { return _FourDimGrid ;} virtual void Mooee (const FermionField &in, FermionField &out){assert(0);};
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;} virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);};
GridBase *FermionGrid(void) { return _FiveDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
// full checkerboard operations; leave unimplemented as abstract for now virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;}; virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
// These can be overridden by fancy 5d chiral action
// half checkerboard operations; leave unimplemented as abstract for now virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);}; virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void Mooee (const FermionField &in, FermionField &out){assert(0);}; virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);};
// Implement hopping term non-hermitian hopping term; half cb or both
virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);}; // Implement s-diagonal DW
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);}; void DW (const FermionField &in, FermionField &out,int dag);
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);}; void Dhop (const FermionField &in, FermionField &out,int dag);
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
// These can be overridden by fancy 5d chiral action
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); // add a DhopComm
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// Implement hopping term non-hermitian hopping term; half cb or both
// Implement s-diagonal DW
void DW (const FermionField &in, FermionField &out,int dag);
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
// add a DhopComm
// -- suboptimal interface will presently trigger multiple comms. // -- suboptimal interface will presently trigger multiple comms.
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// New methods added // New methods added
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st, void DerivInternal(StencilImpl & st,
DoubledGaugeField & U, DoubledGaugeField & U,
GaugeField &mat, GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag); int dag);
void DhopInternal(StencilImpl & st, void DhopInternal(StencilImpl & st,
LebesgueOrder &lo, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &U,
const FermionField &in, const FermionField &in,
FermionField &out, FermionField &out,
int dag); int dag);
// Constructors // Constructors
WilsonFermion5D(GaugeField &_Umu, WilsonFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid, GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid, GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid, GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid, GridRedBlackCartesian &FourDimRedBlackGrid,
double _M5,const ImplParams &p= ImplParams()); double _M5,const ImplParams &p= ImplParams());
// Constructors // Constructors
/* /*
WilsonFermion5D(int simd, WilsonFermion5D(int simd,
GaugeField &_Umu, GaugeField &_Umu,
GridCartesian &FiveDimGrid, GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid, GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid, GridCartesian &FourDimGrid,
double _M5,const ImplParams &p= ImplParams()); double _M5,const ImplParams &p= ImplParams());
*/ */
// DoubleStore
void ImportGauge(const GaugeField &_Umu);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
// Add these to the support from Wilson
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
double M5;
int Ls;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
};
// DoubleStore }}
void ImportGauge(const GaugeField &_Umu);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
// Add these to the support from Wilson
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
GridBase *_FiveDimGrid;
GridBase *_FiveDimRedBlackGrid;
double M5;
int Ls;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
};
}
}
#endif #endif

View File

@ -43,10 +43,9 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
//////////////////////////////////////////// ////////////////////////////////////////////
template <class Impl> template <class Impl>
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag( void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor *buf, int sF,
commVector<SiteHalfSpinor> &buf, int sF, int sU, const FermionField &in, FermionField &out) {
int sU, const FermionField &in, FermionField &out) {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
SiteHalfSpinor *chi_p; SiteHalfSpinor *chi_p;
@ -220,10 +219,9 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
// Need controls to do interior, exterior, or both // Need controls to do interior, exterior, or both
template <class Impl> template <class Impl>
void WilsonKernels<Impl>::DiracOptGenericDhopSite( void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor *buf, int sF,
commVector<SiteHalfSpinor> &buf, int sF, int sU, const FermionField &in, FermionField &out) {
int sU, const FermionField &in, FermionField &out) {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
SiteHalfSpinor *chi_p; SiteHalfSpinor *chi_p;
@ -396,10 +394,9 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSite(
}; };
template <class Impl> template <class Impl>
void WilsonKernels<Impl>::DiracOptDhopDir( void WilsonKernels<Impl>::DiracOptDhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
StencilImpl &st, DoubledGaugeField &U, int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
commVector<SiteHalfSpinor> &buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
SiteSpinor result; SiteSpinor result;

View File

@ -32,175 +32,132 @@ directory
#define GRID_QCD_DHOP_H #define GRID_QCD_DHOP_H
namespace Grid { namespace Grid {
namespace QCD {
namespace QCD { ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Common to both the WilsonFermion and WilsonFermion5D
// Helper routines that implement Wilson stencil for a single site. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Common to both the WilsonFermion and WilsonFermion5D class WilsonKernelsStatic {
//////////////////////////////////////////////////////////////////////////////////////////////////////////////// public:
class WilsonKernelsStatic { // S-direction is INNERMOST and takes no part in the parity.
public: static int AsmOpt; // these are a temporary hack
// S-direction is INNERMOST and takes no part in the parity. static int HandOpt; // these are a temporary hack
static int AsmOpt; // these are a temporary hack };
static int HandOpt; // these are a temporary hack
}; template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
public:
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
public: INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base; public:
public: template <bool EnableBool = true>
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
template <bool EnableBool = true> DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
#ifdef AVX512 #ifdef AVX512
if (AsmOpt) { if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns, WilsonKernels<Impl>::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
in, out); } else {
} else {
#else #else
{ {
#endif #endif
for (int site = 0; site < Ns; site++) { for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
if (HandOpt) if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU, WilsonKernels<Impl>::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out);
in, out); else
else WilsonKernels<Impl>::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out);
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, sF++;
in, out);
sF++;
}
sU++;
}
}
} }
sU++;
template <bool EnableBool = true>
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
out);
sF++;
}
sU++;
}
}
template <bool EnableBool = true>
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
#ifdef AVX512
if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
Ns, in, out);
} else {
#else
{
#endif
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
sU, in, out);
sF++;
}
sU++;
}
}
}
template <bool EnableBool = true>
typename std::enable_if<
(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
sF++;
}
sU++;
}
}
void DiracOptDhopDir(
StencilImpl &st, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
int gamma);
private:
// Specialised variants
void DiracOptGenericDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptGenericDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptAsmDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out);
void DiracOptAsmDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out);
void DiracOptHandDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptHandDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int sF, int sU, const FermionField &in, FermionField &out);
public:
WilsonKernels(const ImplParams &p = ImplParams());
};
} }
} }
}
template <bool EnableBool = true>
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in, out);
sF++;
}
sU++;
}
}
template <bool EnableBool = true>
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
#ifdef AVX512
if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
} else {
#else
{
#endif
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
sF++;
}
sU++;
}
}
}
template <bool EnableBool = true>
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
sF++;
}
sU++;
}
}
void DiracOptDhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
private:
// Specialised variants
void DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptAsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
void DiracOptAsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
void DiracOptHandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptHandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
public:
WilsonKernels(const ImplParams &p = ImplParams());
};
}}
#endif #endif

View File

@ -33,31 +33,27 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Default to no assembler implementation // Default to no assembler implementation
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
template<class Impl> template<class Impl> void
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
commVector<SiteHalfSpinor> &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) {
{ assert(0);
assert(0); }
}
template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
#if defined(AVX512) #if defined(AVX512)
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// If we are AVX512 specialise the single precision routine // If we are AVX512 specialise the single precision routine
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
@ -65,16 +61,16 @@ namespace Grid {
#include <simd/Intel512wilson.h> #include <simd/Intel512wilson.h>
#include <simd/Intel512single.h> #include <simd/Intel512single.h>
static Vector<vComplexF> signs; static Vector<vComplexF> signs;
int setupSigns(void ){ int setupSigns(void ){
Vector<vComplexF> bother(2); Vector<vComplexF> bother(2);
signs = bother; signs = bother;
vrsign(signs[0]); vrsign(signs[0]);
visign(signs[1]); visign(signs[1]);
return 1; return 1;
} }
static int signInit = setupSigns(); static int signInit = setupSigns();
#define label(A) ilabel(A) #define label(A) ilabel(A)
#define ilabel(A) ".globl\n" #A ":\n" #define ilabel(A) ".globl\n" #A ":\n"
@ -84,17 +80,15 @@ namespace Grid {
#define FX(A) WILSONASM_ ##A #define FX(A) WILSONASM_ ##A
#undef KERNEL_DAG #undef KERNEL_DAG
template<> template<> void
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
commVector<SiteHalfSpinor> &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG #define KERNEL_DAG
template<> template<> void
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
commVector<SiteHalfSpinor> &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef VMOVIDUP #undef VMOVIDUP
@ -109,31 +103,26 @@ namespace Grid {
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
#undef KERNEL_DAG #undef KERNEL_DAG
template<> template<> void
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
commVector<SiteHalfSpinor> &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG #define KERNEL_DAG
template<> template<> void
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
commVector<SiteHalfSpinor> &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#endif #endif
#define INSTANTIATE_ASM(A)\ #define INSTANTIATE_ASM(A)\
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
commVector<SiteHalfSpinor> &buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ \
commVector<SiteHalfSpinor> &buf,\ template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
INSTANTIATE_ASM(WilsonImplF); INSTANTIATE_ASM(WilsonImplF);
INSTANTIATE_ASM(WilsonImplD); INSTANTIATE_ASM(WilsonImplD);
INSTANTIATE_ASM(ZWilsonImplF); INSTANTIATE_ASM(ZWilsonImplF);
@ -144,6 +133,6 @@ INSTANTIATE_ASM(DomainWallVec5dImplF);
INSTANTIATE_ASM(DomainWallVec5dImplD); INSTANTIATE_ASM(DomainWallVec5dImplD);
INSTANTIATE_ASM(ZDomainWallVec5dImplF); INSTANTIATE_ASM(ZDomainWallVec5dImplF);
INSTANTIATE_ASM(ZDomainWallVec5dImplD); INSTANTIATE_ASM(ZDomainWallVec5dImplD);
}
} }}

View File

@ -311,10 +311,9 @@ namespace Grid {
namespace QCD { namespace QCD {
template<class Impl> template<class Impl> void
void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
commVector<SiteHalfSpinor> &buf, int ss,int sU,const FermionField &in, FermionField &out)
int ss,int sU,const FermionField &in, FermionField &out)
{ {
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -554,10 +553,9 @@ namespace QCD {
} }
} }
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
commVector<SiteHalfSpinor> &buf, int ss,int sU,const FermionField &in, FermionField &out)
int ss,int sU,const FermionField &in, FermionField &out)
{ {
// std::cout << "Hand op Dhop "<<std::endl; // std::cout << "Hand op Dhop "<<std::endl;
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
@ -798,38 +796,35 @@ namespace QCD {
} }
} }
//////////////////////////////////////////////// ////////////////////////////////////////////////
// Specialise Gparity to simple implementation // Specialise Gparity to simple implementation
//////////////////////////////////////////////// ////////////////////////////////////////////////
template<> template<> void
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf, SiteHalfSpinor *buf,
int sF,int sU,const FermionField &in, FermionField &out) int sF,int sU,const FermionField &in, FermionField &out)
{ {
assert(0); assert(0);
} }
template<> template<> void
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
commVector<SiteHalfSpinor> &buf, SiteHalfSpinor *buf,
int sF,int sU,const FermionField &in, FermionField &out) int sF,int sU,const FermionField &in, FermionField &out)
{ {
assert(0); assert(0);
} }
template<> template<> void
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
commVector<SiteHalfSpinor> &buf, int sF,int sU,const FermionField &in, FermionField &out)
int sF,int sU,const FermionField &in, FermionField &out)
{ {
assert(0); assert(0);
} }
template<> template<> void
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
commVector<SiteHalfSpinor> &buf, int sF,int sU,const FermionField &in, FermionField &out)
int sF,int sU,const FermionField &in, FermionField &out)
{ {
assert(0); assert(0);
} }
@ -840,12 +835,10 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
// Need Nc=3 though // // Need Nc=3 though //
#define INSTANTIATE_THEM(A) \ #define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\ template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
commVector<SiteHalfSpinor> &buf,\ int ss,int sU,const FermionField &in, FermionField &out); \
int ss,int sU,const FermionField &in, FermionField &out);\ template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\ int ss,int sU,const FermionField &in, FermionField &out);
commVector<SiteHalfSpinor> &buf,\
int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(WilsonImplF); INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD); INSTANTIATE_THEM(WilsonImplD);

View File

@ -116,7 +116,7 @@ int main (int argc, char ** argv)
else if (SE->_is_local) else if (SE->_is_local)
Check._odata[i] = Foo._odata[SE->_offset]; Check._odata[i] = Foo._odata[SE->_offset];
else else
Check._odata[i] = myStencil.comm_buf[SE->_offset]; Check._odata[i] = myStencil.CommBuf()[SE->_offset];
} }
Real nrmC = norm2(Check); Real nrmC = norm2(Check);
@ -207,7 +207,7 @@ int main (int argc, char ** argv)
else if (SE->_is_local) else if (SE->_is_local)
OCheck._odata[i] = EFoo._odata[SE->_offset]; OCheck._odata[i] = EFoo._odata[SE->_offset];
else else
OCheck._odata[i] = EStencil.comm_buf[SE->_offset]; OCheck._odata[i] = EStencil.CommBuf()[SE->_offset];
} }
for(int i=0;i<ECheck._grid->oSites();i++){ for(int i=0;i<ECheck._grid->oSites();i++){
int permute_type; int permute_type;
@ -220,7 +220,7 @@ int main (int argc, char ** argv)
else if (SE->_is_local) else if (SE->_is_local)
ECheck._odata[i] = OFoo._odata[SE->_offset]; ECheck._odata[i] = OFoo._odata[SE->_offset];
else else
ECheck._odata[i] = OStencil.comm_buf[SE->_offset]; ECheck._odata[i] = OStencil.CommBuf()[SE->_offset];
} }
setCheckerboard(Check,ECheck); setCheckerboard(Check,ECheck);