mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-11 03:46:55 +01:00
Massive compressor rework to support reduced precision comms
This commit is contained in:
@ -35,7 +35,6 @@ directory
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Template parameter class constructs to package
|
||||
// externally control Fermion implementations
|
||||
@ -89,7 +88,53 @@ namespace QCD {
|
||||
//
|
||||
// }
|
||||
//////////////////////////////////////////////
|
||||
|
||||
|
||||
template <class T> struct SamePrecisionMapper {
|
||||
typedef T HigherPrecVector ;
|
||||
typedef T LowerPrecVector ;
|
||||
};
|
||||
template <class T> struct LowerPrecisionMapper { };
|
||||
template <> struct LowerPrecisionMapper<vRealF> {
|
||||
typedef vRealF HigherPrecVector ;
|
||||
typedef vRealH LowerPrecVector ;
|
||||
};
|
||||
template <> struct LowerPrecisionMapper<vRealD> {
|
||||
typedef vRealD HigherPrecVector ;
|
||||
typedef vRealF LowerPrecVector ;
|
||||
};
|
||||
template <> struct LowerPrecisionMapper<vComplexF> {
|
||||
typedef vComplexF HigherPrecVector ;
|
||||
typedef vComplexH LowerPrecVector ;
|
||||
};
|
||||
template <> struct LowerPrecisionMapper<vComplexD> {
|
||||
typedef vComplexD HigherPrecVector ;
|
||||
typedef vComplexF LowerPrecVector ;
|
||||
};
|
||||
|
||||
struct CoeffReal {
|
||||
public:
|
||||
typedef RealD _Coeff_t;
|
||||
static const int Nhcs = 2;
|
||||
template<class Simd> using PrecisionMapper = SamePrecisionMapper<Simd>;
|
||||
};
|
||||
struct CoeffRealHalfComms {
|
||||
public:
|
||||
typedef RealD _Coeff_t;
|
||||
static const int Nhcs = 1;
|
||||
template<class Simd> using PrecisionMapper = LowerPrecisionMapper<Simd>;
|
||||
};
|
||||
struct CoeffComplex {
|
||||
public:
|
||||
typedef ComplexD _Coeff_t;
|
||||
static const int Nhcs = 2;
|
||||
template<class Simd> using PrecisionMapper = SamePrecisionMapper<Simd>;
|
||||
};
|
||||
struct CoeffComplexHalfComms {
|
||||
public:
|
||||
typedef ComplexD _Coeff_t;
|
||||
static const int Nhcs = 1;
|
||||
template<class Simd> using PrecisionMapper = LowerPrecisionMapper<Simd>;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Implementation dependent fermion types
|
||||
@ -114,37 +159,40 @@ namespace QCD {
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour four spinors with colour index
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
|
||||
template <class S, class Representation = FundamentalRepresentation,class Options = CoeffReal >
|
||||
class WilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
|
||||
|
||||
public:
|
||||
|
||||
static const int Dimension = Representation::Dimension;
|
||||
static const bool LsVectorised=false;
|
||||
static const int Nhcs = Options::Nhcs;
|
||||
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
//Necessary?
|
||||
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
|
||||
|
||||
const bool LsVectorised=false;
|
||||
typedef _Coeff_t Coeff_t;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
typedef typename Options::_Coeff_t Coeff_t;
|
||||
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
|
||||
|
||||
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
|
||||
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Ns> >;
|
||||
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
|
||||
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhcs> >;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplPropagator<Simd> SitePropagator;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
|
||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SitePropagator> PropagatorField;
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonImplParams ImplParams;
|
||||
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||
|
||||
@ -209,31 +257,34 @@ namespace QCD {
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour four spinors with colour index, 5d redblack
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
|
||||
template<class S,int Nrepresentation=Nc, class Options=CoeffReal>
|
||||
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
||||
public:
|
||||
|
||||
static const int Dimension = Nrepresentation;
|
||||
const bool LsVectorised=true;
|
||||
typedef _Coeff_t Coeff_t;
|
||||
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
static const int Dimension = Nrepresentation;
|
||||
static const bool LsVectorised=true;
|
||||
static const int Nhcs = Options::Nhcs;
|
||||
|
||||
typedef typename Options::_Coeff_t Coeff_t;
|
||||
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
|
||||
|
||||
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
||||
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Nrepresentation>, Ns> >;
|
||||
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
||||
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhcs> >;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
|
||||
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
|
||||
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplPropagator<Simd> SitePropagator;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SitePropagator> PropagatorField;
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplPropagator<Simd> SitePropagator;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SitePropagator> PropagatorField;
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Make the doubled gauge field a *scalar*
|
||||
@ -241,9 +292,9 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
|
||||
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
|
||||
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
|
||||
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonImplParams ImplParams;
|
||||
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||
|
||||
@ -311,35 +362,37 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Flavour doubled spinors; is Gparity the only? what about C*?
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <class S, int Nrepresentation,class _Coeff_t = RealD>
|
||||
template <class S, int Nrepresentation, class Options=CoeffReal>
|
||||
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
|
||||
public:
|
||||
|
||||
static const int Dimension = Nrepresentation;
|
||||
static const int Nhcs = Options::Nhcs;
|
||||
static const bool LsVectorised=false;
|
||||
|
||||
const bool LsVectorised=false;
|
||||
|
||||
typedef _Coeff_t Coeff_t;
|
||||
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
typedef typename Options::_Coeff_t Coeff_t;
|
||||
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
|
||||
|
||||
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
|
||||
template <typename vtype> using iImplPropagator = iVector<iMatrix<iMatrix<vtype, Nrepresentation>, Ns>, Ngp >;
|
||||
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
|
||||
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
|
||||
template <typename vtype> using iImplPropagator = iVector<iMatrix<iMatrix<vtype, Nrepresentation>, Ns>, Ngp>;
|
||||
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
|
||||
template <typename vtype> using iImplHalfCommSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhcs>, Ngp>;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplPropagator<Simd> SitePropagator;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplPropagator<Simd> SitePropagator;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
|
||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SitePropagator> PropagatorField;
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||
|
||||
typedef GparityWilsonImplParams ImplParams;
|
||||
@ -356,8 +409,8 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
|
||||
typedef SiteHalfSpinor vobj;
|
||||
typedef typename SiteHalfSpinor::scalar_object sobj;
|
||||
typedef SiteHalfSpinor vobj;
|
||||
typedef typename SiteHalfSpinor::scalar_object sobj;
|
||||
|
||||
vobj vtmp;
|
||||
sobj stmp;
|
||||
@ -475,7 +528,6 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
|
||||
|
||||
// DhopDir provides U or Uconj depending on coor/flavour.
|
||||
@ -508,23 +560,22 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour one component spinors with colour index
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
template <class S, class Representation = FundamentalRepresentation >
|
||||
class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour one component spinors with colour index
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
template <class S, class Representation = FundamentalRepresentation >
|
||||
class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
|
||||
|
||||
public:
|
||||
|
||||
typedef RealD _Coeff_t ;
|
||||
static const int Dimension = Representation::Dimension;
|
||||
static const bool LsVectorised=false;
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||
|
||||
//Necessary?
|
||||
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
|
||||
|
||||
const bool LsVectorised=false;
|
||||
typedef _Coeff_t Coeff_t;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
@ -641,8 +692,6 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour one component spinors with colour index. 5d vec
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
@ -651,16 +700,14 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
|
||||
public:
|
||||
|
||||
typedef RealD _Coeff_t ;
|
||||
static const int Dimension = Representation::Dimension;
|
||||
static const bool LsVectorised=true;
|
||||
typedef RealD Coeff_t ;
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||
|
||||
//Necessary?
|
||||
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
|
||||
|
||||
const bool LsVectorised=true;
|
||||
|
||||
typedef _Coeff_t Coeff_t;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
@ -823,43 +870,61 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
}
|
||||
};
|
||||
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > WilsonImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
|
||||
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRH; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDH; // Double
|
||||
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
|
||||
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRH; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDH; // Double
|
||||
|
||||
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double
|
||||
typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, AdjointRepresentation, CoeffReal > WilsonAdjImplD; // Double
|
||||
|
||||
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double
|
||||
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplD; // Double
|
||||
|
||||
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
|
||||
typedef DomainWallVec5dImpl<vComplex ,Nc, CoeffReal> DomainWallVec5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,Nc, CoeffReal> DomainWallVec5dImplF; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,Nc, CoeffReal> DomainWallVec5dImplD; // Double
|
||||
|
||||
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
|
||||
typedef DomainWallVec5dImpl<vComplex ,Nc, CoeffRealHalfComms> DomainWallVec5dImplRH; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,Nc, CoeffRealHalfComms> DomainWallVec5dImplFH; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,Nc, CoeffRealHalfComms> DomainWallVec5dImplDH; // Double
|
||||
|
||||
typedef GparityWilsonImpl<vComplex , Nc> GparityWilsonImplR; // Real.. whichever prec
|
||||
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
|
||||
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
|
||||
typedef DomainWallVec5dImpl<vComplex ,Nc,CoeffComplex> ZDomainWallVec5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,Nc,CoeffComplex> ZDomainWallVec5dImplF; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,Nc,CoeffComplex> ZDomainWallVec5dImplD; // Double
|
||||
|
||||
typedef DomainWallVec5dImpl<vComplex ,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplRH; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplFH; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplDH; // Double
|
||||
|
||||
typedef GparityWilsonImpl<vComplex , Nc,CoeffReal> GparityWilsonImplR; // Real.. whichever prec
|
||||
typedef GparityWilsonImpl<vComplexF, Nc,CoeffReal> GparityWilsonImplF; // Float
|
||||
typedef GparityWilsonImpl<vComplexD, Nc,CoeffReal> GparityWilsonImplD; // Double
|
||||
|
||||
typedef GparityWilsonImpl<vComplex , Nc,CoeffRealHalfComms> GparityWilsonImplRH; // Real.. whichever prec
|
||||
typedef GparityWilsonImpl<vComplexF, Nc,CoeffRealHalfComms> GparityWilsonImplFH; // Float
|
||||
typedef GparityWilsonImpl<vComplexD, Nc,CoeffRealHalfComms> GparityWilsonImplDH; // Double
|
||||
|
||||
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
|
||||
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
|
||||
typedef StaggeredImpl<vComplexD, FundamentalRepresentation > StaggeredImplD; // Double
|
||||
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
|
||||
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
|
||||
typedef StaggeredImpl<vComplexD, FundamentalRepresentation > StaggeredImplD; // Double
|
||||
|
||||
typedef StaggeredVec5dImpl<vComplex, FundamentalRepresentation > StaggeredVec5dImplR; // Real.. whichever prec
|
||||
typedef StaggeredVec5dImpl<vComplexF, FundamentalRepresentation > StaggeredVec5dImplF; // Float
|
||||
typedef StaggeredVec5dImpl<vComplexD, FundamentalRepresentation > StaggeredVec5dImplD; // Double
|
||||
typedef StaggeredVec5dImpl<vComplex, FundamentalRepresentation > StaggeredVec5dImplR; // Real.. whichever prec
|
||||
typedef StaggeredVec5dImpl<vComplexF, FundamentalRepresentation > StaggeredVec5dImplF; // Float
|
||||
typedef StaggeredVec5dImpl<vComplexD, FundamentalRepresentation > StaggeredVec5dImplD; // Double
|
||||
|
||||
}}
|
||||
|
||||
|
@ -33,228 +33,292 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonCompressor {
|
||||
public:
|
||||
int mu;
|
||||
int dag;
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// optimised versions supporting half precision too
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
WilsonCompressor(int _dag){
|
||||
mu=0;
|
||||
dag=_dag;
|
||||
assert((dag==0)||(dag==1));
|
||||
}
|
||||
void Point(int p) {
|
||||
mu=p;
|
||||
};
|
||||
template<class _HCspinor,class _Hspinor,class _Spinor, class projector,typename SFINAE = void >
|
||||
class WilsonCompressorTemplate;
|
||||
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
int mudag=mu;
|
||||
if (!dag) {
|
||||
mudag=(mu+Nd)%(2*Nd);
|
||||
}
|
||||
switch(mudag) {
|
||||
case Xp:
|
||||
spProjXp(ret,in);
|
||||
break;
|
||||
case Yp:
|
||||
spProjYp(ret,in);
|
||||
break;
|
||||
case Zp:
|
||||
spProjZp(ret,in);
|
||||
break;
|
||||
case Tp:
|
||||
spProjTp(ret,in);
|
||||
break;
|
||||
case Xm:
|
||||
spProjXm(ret,in);
|
||||
break;
|
||||
case Ym:
|
||||
spProjYm(ret,in);
|
||||
break;
|
||||
case Zm:
|
||||
spProjZm(ret,in);
|
||||
break;
|
||||
case Tm:
|
||||
spProjTm(ret,in);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////
|
||||
// optimised versions
|
||||
/////////////////////////
|
||||
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
|
||||
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
|
||||
typename std::enable_if<std::is_same<_HCspinor,_Hspinor>::value>::type >
|
||||
{
|
||||
public:
|
||||
|
||||
int mu,dag;
|
||||
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonXpCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjXp(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonYpCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjYp(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonZpCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjZp(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonTpCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjTp(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
void Point(int p) { mu=p; };
|
||||
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonXmCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjXm(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonYmCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjYm(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonZmCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjZm(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonTmCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjTm(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
WilsonCompressorTemplate(int _dag=0){
|
||||
dag = _dag;
|
||||
}
|
||||
|
||||
// Fast comms buffer manipulation which should inline right through (avoid direction
|
||||
// dependent logic that prevents inlining
|
||||
template<class vobj,class cobj>
|
||||
class WilsonStencil : public CartesianStencil<vobj,cobj> {
|
||||
public:
|
||||
typedef _Spinor SiteSpinor;
|
||||
typedef _Hspinor SiteHalfSpinor;
|
||||
typedef _HCspinor SiteHalfCommSpinor;
|
||||
typedef typename SiteHalfCommSpinor::vector_type vComplexLow;
|
||||
typedef typename SiteHalfSpinor::vector_type vComplexHigh;
|
||||
constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
|
||||
|
||||
typedef CartesianCommunicator::CommsRequest_t CommsRequest_t;
|
||||
inline int CommDatumSize(void) {
|
||||
return sizeof(SiteHalfCommSpinor);
|
||||
}
|
||||
|
||||
WilsonStencil(GridBase *grid,
|
||||
/*****************************************************/
|
||||
/* Compress includes precision change if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) {
|
||||
projector::Proj(buf[o],in,mu,dag);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Exchange includes precision change if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Exchange(SiteHalfSpinor *mp,
|
||||
SiteHalfSpinor *vp0,
|
||||
SiteHalfSpinor *vp1,
|
||||
Integer type,Integer o){
|
||||
exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Have a decompression step if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Decompress(SiteHalfSpinor *out,
|
||||
SiteHalfSpinor *in, Integer o) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Compress Exchange */
|
||||
/*****************************************************/
|
||||
inline void CompressExchange(SiteHalfSpinor *out0,
|
||||
SiteHalfSpinor *out1,
|
||||
const SiteSpinor *in,
|
||||
Integer j,Integer k, Integer m,Integer type){
|
||||
SiteHalfSpinor temp1, temp2,temp3,temp4;
|
||||
projector::Proj(temp1,in[k],mu,dag);
|
||||
projector::Proj(temp2,in[m],mu,dag);
|
||||
exchange(out0[j],out1[j],temp1,temp2,type);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Pass the info to the stencil */
|
||||
/*****************************************************/
|
||||
inline bool DecompressionStep(void) { return false; }
|
||||
|
||||
};
|
||||
|
||||
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
|
||||
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
|
||||
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
|
||||
{
|
||||
public:
|
||||
|
||||
int mu,dag;
|
||||
|
||||
void Point(int p) { mu=p; };
|
||||
|
||||
WilsonCompressorTemplate(int _dag=0){
|
||||
dag = _dag;
|
||||
}
|
||||
|
||||
typedef _Spinor SiteSpinor;
|
||||
typedef _Hspinor SiteHalfSpinor;
|
||||
typedef _HCspinor SiteHalfCommSpinor;
|
||||
typedef typename SiteHalfCommSpinor::vector_type vComplexLow;
|
||||
typedef typename SiteHalfSpinor::vector_type vComplexHigh;
|
||||
constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
|
||||
|
||||
inline int CommDatumSize(void) {
|
||||
return sizeof(SiteHalfCommSpinor);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Compress includes precision change if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) {
|
||||
SiteHalfSpinor hsp;
|
||||
SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf;
|
||||
projector::Proj(hsp,in,mu,dag);
|
||||
precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Exchange includes precision change if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Exchange(SiteHalfSpinor *mp,
|
||||
SiteHalfSpinor *vp0,
|
||||
SiteHalfSpinor *vp1,
|
||||
Integer type,Integer o){
|
||||
SiteHalfSpinor vt0,vt1;
|
||||
SiteHalfCommSpinor *vpp0 = (SiteHalfCommSpinor *)vp0;
|
||||
SiteHalfCommSpinor *vpp1 = (SiteHalfCommSpinor *)vp1;
|
||||
precisionChange((vComplexHigh *)&vt0,(vComplexLow *)&vpp0[o],Nw);
|
||||
precisionChange((vComplexHigh *)&vt1,(vComplexLow *)&vpp1[o],Nw);
|
||||
exchange(mp[2*o],mp[2*o+1],vt0,vt1,type);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Have a decompression step if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Decompress(SiteHalfSpinor *out,
|
||||
SiteHalfSpinor *in, Integer o){
|
||||
SiteHalfCommSpinor *hin=(SiteHalfCommSpinor *)in;
|
||||
precisionChange((vComplexHigh *)&out[o],(vComplexLow *)&hin[o],Nw);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Compress Exchange */
|
||||
/*****************************************************/
|
||||
inline void CompressExchange(SiteHalfSpinor *out0,
|
||||
SiteHalfSpinor *out1,
|
||||
const SiteSpinor *in,
|
||||
Integer j,Integer k, Integer m,Integer type){
|
||||
SiteHalfSpinor temp1, temp2,temp3,temp4;
|
||||
SiteHalfCommSpinor *hout0 = (SiteHalfCommSpinor *)out0;
|
||||
SiteHalfCommSpinor *hout1 = (SiteHalfCommSpinor *)out1;
|
||||
projector::Proj(temp1,in[k],mu,dag);
|
||||
projector::Proj(temp2,in[m],mu,dag);
|
||||
exchange(temp3,temp4,temp1,temp2,type);
|
||||
precisionChange((vComplexLow *)&hout0[j],(vComplexHigh *)&temp3,Nw);
|
||||
precisionChange((vComplexLow *)&hout1[j],(vComplexHigh *)&temp4,Nw);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Pass the info to the stencil */
|
||||
/*****************************************************/
|
||||
inline bool DecompressionStep(void) { return true; }
|
||||
|
||||
};
|
||||
|
||||
#define DECLARE_PROJ(Projector,Compressor,spProj) \
|
||||
class Projector { \
|
||||
public: \
|
||||
template<class hsp,class fsp> \
|
||||
static void Proj(hsp &result,const fsp &in,int mu,int dag){ \
|
||||
spProj(result,in); \
|
||||
} \
|
||||
}; \
|
||||
template<typename HCS,typename HS,typename S> using Compressor = WilsonCompressorTemplate<HCS,HS,S,Projector>;
|
||||
|
||||
DECLARE_PROJ(WilsonXpProjector,WilsonXpCompressor,spProjXp);
|
||||
DECLARE_PROJ(WilsonYpProjector,WilsonYpCompressor,spProjYp);
|
||||
DECLARE_PROJ(WilsonZpProjector,WilsonZpCompressor,spProjZp);
|
||||
DECLARE_PROJ(WilsonTpProjector,WilsonTpCompressor,spProjTp);
|
||||
DECLARE_PROJ(WilsonXmProjector,WilsonXmCompressor,spProjXm);
|
||||
DECLARE_PROJ(WilsonYmProjector,WilsonYmCompressor,spProjYm);
|
||||
DECLARE_PROJ(WilsonZmProjector,WilsonZmCompressor,spProjZm);
|
||||
DECLARE_PROJ(WilsonTmProjector,WilsonTmCompressor,spProjTm);
|
||||
|
||||
class WilsonProjector {
|
||||
public:
|
||||
template<class hsp,class fsp>
|
||||
static void Proj(hsp &result,const fsp &in,int mu,int dag){
|
||||
int mudag=dag? mu : (mu+Nd)%(2*Nd);
|
||||
switch(mudag) {
|
||||
case Xp: spProjXp(result,in); break;
|
||||
case Yp: spProjYp(result,in); break;
|
||||
case Zp: spProjZp(result,in); break;
|
||||
case Tp: spProjTp(result,in); break;
|
||||
case Xm: spProjXm(result,in); break;
|
||||
case Ym: spProjYm(result,in); break;
|
||||
case Zm: spProjZm(result,in); break;
|
||||
case Tm: spProjTm(result,in); break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
}
|
||||
};
|
||||
template<typename HCS,typename HS,typename S> using WilsonCompressor = WilsonCompressorTemplate<HCS,HS,S,WilsonProjector>;
|
||||
|
||||
// Fast comms buffer manipulation which should inline right through (avoid direction
|
||||
// dependent logic that prevents inlining
|
||||
template<class vobj,class cobj>
|
||||
class WilsonStencil : public CartesianStencil<vobj,cobj> {
|
||||
public:
|
||||
|
||||
typedef CartesianCommunicator::CommsRequest_t CommsRequest_t;
|
||||
|
||||
WilsonStencil(GridBase *grid,
|
||||
int npoints,
|
||||
int checkerboard,
|
||||
const std::vector<int> &directions,
|
||||
const std::vector<int> &distances) : CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
|
||||
{ };
|
||||
const std::vector<int> &distances)
|
||||
: CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
|
||||
{ /*Do nothing*/ };
|
||||
|
||||
template < class compressor>
|
||||
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
std::vector<std::vector<CommsRequest_t> > reqs;
|
||||
HaloExchangeOptGather(source,compress);
|
||||
this->CommunicateBegin(reqs);
|
||||
this->calls++;
|
||||
this->CommunicateComplete(reqs);
|
||||
this->CommsMerge();
|
||||
}
|
||||
template < class compressor>
|
||||
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
std::vector<std::vector<CommsRequest_t> > reqs;
|
||||
this->HaloExchangeOptGather(source,compress);
|
||||
this->CommunicateBegin(reqs);
|
||||
this->CommunicateComplete(reqs);
|
||||
this->CommsMerge(compress);
|
||||
}
|
||||
|
||||
template <class compressor>
|
||||
void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
this->Prepare();
|
||||
this->HaloGatherOpt(source,compress);
|
||||
}
|
||||
|
||||
template < class compressor>
|
||||
void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
this->calls++;
|
||||
this->Mergers.resize(0);
|
||||
this->Packets.resize(0);
|
||||
this->HaloGatherOpt(source,compress);
|
||||
}
|
||||
template <class compressor>
|
||||
void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
// Strategy. Inherit types from Compressor.
|
||||
// Use types to select the write direction by directon compressor
|
||||
typedef typename compressor::SiteSpinor SiteSpinor;
|
||||
typedef typename compressor::SiteHalfSpinor SiteHalfSpinor;
|
||||
typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor;
|
||||
|
||||
this->_grid->StencilBarrier();
|
||||
|
||||
template < class compressor>
|
||||
void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
this->_grid->StencilBarrier();
|
||||
// conformable(source._grid,_grid);
|
||||
assert(source._grid==this->_grid);
|
||||
this->halogtime-=usecond();
|
||||
assert(source._grid==this->_grid);
|
||||
this->halogtime-=usecond();
|
||||
|
||||
this->u_comm_offset=0;
|
||||
|
||||
this->u_comm_offset=0;
|
||||
|
||||
int dag = compress.dag;
|
||||
|
||||
WilsonXpCompressor<cobj,vobj> XpCompress;
|
||||
WilsonYpCompressor<cobj,vobj> YpCompress;
|
||||
WilsonZpCompressor<cobj,vobj> ZpCompress;
|
||||
WilsonTpCompressor<cobj,vobj> TpCompress;
|
||||
WilsonXmCompressor<cobj,vobj> XmCompress;
|
||||
WilsonYmCompressor<cobj,vobj> YmCompress;
|
||||
WilsonZmCompressor<cobj,vobj> ZmCompress;
|
||||
WilsonTmCompressor<cobj,vobj> TmCompress;
|
||||
WilsonXpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> XpCompress;
|
||||
WilsonYpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> YpCompress;
|
||||
WilsonZpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> ZpCompress;
|
||||
WilsonTpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> TpCompress;
|
||||
WilsonXmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> XmCompress;
|
||||
WilsonYmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> YmCompress;
|
||||
WilsonZmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> ZmCompress;
|
||||
WilsonTmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> TmCompress;
|
||||
|
||||
// Gather all comms buffers
|
||||
// for(int point = 0 ; point < _npoints; point++) {
|
||||
// compress.Point(point);
|
||||
// HaloGatherDir(source,compress,point,face_idx);
|
||||
// }
|
||||
int face_idx=0;
|
||||
if ( dag ) {
|
||||
// std::cout << " Optimised Dagger compress " <<std::endl;
|
||||
this->HaloGatherDir(source,XpCompress,Xp,face_idx);
|
||||
this->HaloGatherDir(source,YpCompress,Yp,face_idx);
|
||||
this->HaloGatherDir(source,ZpCompress,Zp,face_idx);
|
||||
this->HaloGatherDir(source,TpCompress,Tp,face_idx);
|
||||
this->HaloGatherDir(source,XmCompress,Xm,face_idx);
|
||||
this->HaloGatherDir(source,YmCompress,Ym,face_idx);
|
||||
this->HaloGatherDir(source,ZmCompress,Zm,face_idx);
|
||||
this->HaloGatherDir(source,TmCompress,Tm,face_idx);
|
||||
} else {
|
||||
this->HaloGatherDir(source,XmCompress,Xp,face_idx);
|
||||
this->HaloGatherDir(source,YmCompress,Yp,face_idx);
|
||||
this->HaloGatherDir(source,ZmCompress,Zp,face_idx);
|
||||
this->HaloGatherDir(source,TmCompress,Tp,face_idx);
|
||||
this->HaloGatherDir(source,XpCompress,Xm,face_idx);
|
||||
this->HaloGatherDir(source,YpCompress,Ym,face_idx);
|
||||
this->HaloGatherDir(source,ZpCompress,Zm,face_idx);
|
||||
this->HaloGatherDir(source,TpCompress,Tm,face_idx);
|
||||
}
|
||||
this->face_table_computed=1;
|
||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||
this->halogtime+=usecond();
|
||||
int dag = compress.dag;
|
||||
int face_idx=0;
|
||||
if ( dag ) {
|
||||
// std::cout << " Optimised Dagger compress " <<std::endl;
|
||||
this->HaloGatherDir(source,XpCompress,Xp,face_idx);
|
||||
this->HaloGatherDir(source,YpCompress,Yp,face_idx);
|
||||
this->HaloGatherDir(source,ZpCompress,Zp,face_idx);
|
||||
this->HaloGatherDir(source,TpCompress,Tp,face_idx);
|
||||
this->HaloGatherDir(source,XmCompress,Xm,face_idx);
|
||||
this->HaloGatherDir(source,YmCompress,Ym,face_idx);
|
||||
this->HaloGatherDir(source,ZmCompress,Zm,face_idx);
|
||||
this->HaloGatherDir(source,TmCompress,Tm,face_idx);
|
||||
} else {
|
||||
this->HaloGatherDir(source,XmCompress,Xp,face_idx);
|
||||
this->HaloGatherDir(source,YmCompress,Yp,face_idx);
|
||||
this->HaloGatherDir(source,ZmCompress,Zp,face_idx);
|
||||
this->HaloGatherDir(source,TmCompress,Tp,face_idx);
|
||||
this->HaloGatherDir(source,XpCompress,Xm,face_idx);
|
||||
this->HaloGatherDir(source,YpCompress,Ym,face_idx);
|
||||
this->HaloGatherDir(source,ZpCompress,Zm,face_idx);
|
||||
this->HaloGatherDir(source,TpCompress,Tm,face_idx);
|
||||
}
|
||||
this->face_table_computed=1;
|
||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||
this->halogtime+=usecond();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
}} // namespace close
|
||||
#endif
|
||||
|
@ -439,7 +439,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
}
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMerge();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
#pragma omp parallel
|
||||
|
Reference in New Issue
Block a user