mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-13 01:05:36 +00:00
Working half precision comms
This commit is contained in:
parent
4a340aa5ca
commit
fc4ab9ccd5
@ -152,9 +152,6 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
RealD NP = UGrid->_Nprocessors;
|
RealD NP = UGrid->_Nprocessors;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Creating action operator " << std::endl;
|
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
|
||||||
|
|
||||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
@ -168,6 +165,7 @@ int main (int argc, char ** argv)
|
|||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
int ncall =1000;
|
int ncall =1000;
|
||||||
if (1) {
|
if (1) {
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
@ -206,6 +204,33 @@ int main (int argc, char ** argv)
|
|||||||
Dw.Report();
|
Dw.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
if (1) {
|
||||||
|
FGrid->Barrier();
|
||||||
|
DwH.ZeroCounters();
|
||||||
|
DwH.Dhop(src,result,0);
|
||||||
|
double t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
__SSC_START;
|
||||||
|
DwH.Dhop(src,result,0);
|
||||||
|
__SSC_STOP;
|
||||||
|
}
|
||||||
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=1344*volume*ncall;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
|
err = ref-result;
|
||||||
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
|
||||||
|
assert (norm2(err)< 1.0e-3 );
|
||||||
|
DwH.Report();
|
||||||
|
}
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -237,6 +237,13 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
|
|||||||
INSTANTIATE_DPERP(GparityWilsonImplD);
|
INSTANTIATE_DPERP(GparityWilsonImplD);
|
||||||
INSTANTIATE_DPERP(ZWilsonImplF);
|
INSTANTIATE_DPERP(ZWilsonImplF);
|
||||||
INSTANTIATE_DPERP(ZWilsonImplD);
|
INSTANTIATE_DPERP(ZWilsonImplD);
|
||||||
|
|
||||||
|
INSTANTIATE_DPERP(WilsonImplFH);
|
||||||
|
INSTANTIATE_DPERP(WilsonImplDF);
|
||||||
|
INSTANTIATE_DPERP(GparityWilsonImplFH);
|
||||||
|
INSTANTIATE_DPERP(GparityWilsonImplDF);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplFH);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplDF);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -137,6 +137,20 @@ template void CayleyFermion5D<WilsonImplF>::MooeeInternal(const FermionField &ps
|
|||||||
template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
template void CayleyFermion5D<ZWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<ZWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
template void CayleyFermion5D<ZWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<ZWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
|
||||||
|
INSTANTIATE_DPERP(GparityWilsonImplFH);
|
||||||
|
INSTANTIATE_DPERP(GparityWilsonImplDF);
|
||||||
|
INSTANTIATE_DPERP(WilsonImplFH);
|
||||||
|
INSTANTIATE_DPERP(WilsonImplDF);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplFH);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplDF);
|
||||||
|
|
||||||
|
template void CayleyFermion5D<GparityWilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<GparityWilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<WilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<WilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<ZWilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<ZWilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -37,7 +37,6 @@ namespace Grid {
|
|||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||||
|
|
||||||
// Pminus fowards
|
// Pminus fowards
|
||||||
// Pplus backwards
|
// Pplus backwards
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -152,6 +151,13 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
|
|||||||
INSTANTIATE_DPERP(GparityWilsonImplD);
|
INSTANTIATE_DPERP(GparityWilsonImplD);
|
||||||
INSTANTIATE_DPERP(ZWilsonImplF);
|
INSTANTIATE_DPERP(ZWilsonImplF);
|
||||||
INSTANTIATE_DPERP(ZWilsonImplD);
|
INSTANTIATE_DPERP(ZWilsonImplD);
|
||||||
|
|
||||||
|
INSTANTIATE_DPERP(WilsonImplFH);
|
||||||
|
INSTANTIATE_DPERP(WilsonImplDF);
|
||||||
|
INSTANTIATE_DPERP(GparityWilsonImplFH);
|
||||||
|
INSTANTIATE_DPERP(GparityWilsonImplDF);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplFH);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplDF);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -808,10 +808,21 @@ INSTANTIATE_DPERP(DomainWallVec5dImplF);
|
|||||||
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
|
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
|
||||||
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
|
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
|
||||||
|
|
||||||
|
INSTANTIATE_DPERP(DomainWallVec5dImplDF);
|
||||||
|
INSTANTIATE_DPERP(DomainWallVec5dImplFH);
|
||||||
|
INSTANTIATE_DPERP(ZDomainWallVec5dImplDF);
|
||||||
|
INSTANTIATE_DPERP(ZDomainWallVec5dImplFH);
|
||||||
|
|
||||||
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
|
||||||
|
template void CayleyFermion5D<DomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<DomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -89,6 +89,10 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR;
|
|||||||
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
||||||
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
||||||
|
|
||||||
|
typedef WilsonFermion<WilsonImplRL> WilsonFermionRL;
|
||||||
|
typedef WilsonFermion<WilsonImplFH> WilsonFermionFH;
|
||||||
|
typedef WilsonFermion<WilsonImplDF> WilsonFermionDF;
|
||||||
|
|
||||||
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
|
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
|
||||||
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
|
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
|
||||||
typedef WilsonFermion<WilsonAdjImplD> WilsonAdjFermionD;
|
typedef WilsonFermion<WilsonAdjImplD> WilsonAdjFermionD;
|
||||||
@ -105,27 +109,50 @@ typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;
|
|||||||
typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
|
typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
|
||||||
typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
|
typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
|
||||||
|
|
||||||
|
typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL;
|
||||||
|
typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH;
|
||||||
|
typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF;
|
||||||
|
|
||||||
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
||||||
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
||||||
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
||||||
|
|
||||||
|
typedef MobiusFermion<WilsonImplRL> MobiusFermionRL;
|
||||||
|
typedef MobiusFermion<WilsonImplFH> MobiusFermionFH;
|
||||||
|
typedef MobiusFermion<WilsonImplDF> MobiusFermionDF;
|
||||||
|
|
||||||
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
|
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
|
||||||
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
|
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
|
||||||
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
|
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
|
||||||
|
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL;
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH;
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF;
|
||||||
|
|
||||||
// Ls vectorised
|
// Ls vectorised
|
||||||
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallFermionVec5dR;
|
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallFermionVec5dR;
|
||||||
typedef DomainWallFermion<DomainWallVec5dImplF> DomainWallFermionVec5dF;
|
typedef DomainWallFermion<DomainWallVec5dImplF> DomainWallFermionVec5dF;
|
||||||
typedef DomainWallFermion<DomainWallVec5dImplD> DomainWallFermionVec5dD;
|
typedef DomainWallFermion<DomainWallVec5dImplD> DomainWallFermionVec5dD;
|
||||||
|
|
||||||
|
typedef DomainWallFermion<DomainWallVec5dImplRL> DomainWallFermionVec5dRL;
|
||||||
|
typedef DomainWallFermion<DomainWallVec5dImplFH> DomainWallFermionVec5dFH;
|
||||||
|
typedef DomainWallFermion<DomainWallVec5dImplDF> DomainWallFermionVec5dDF;
|
||||||
|
|
||||||
typedef MobiusFermion<DomainWallVec5dImplR> MobiusFermionVec5dR;
|
typedef MobiusFermion<DomainWallVec5dImplR> MobiusFermionVec5dR;
|
||||||
typedef MobiusFermion<DomainWallVec5dImplF> MobiusFermionVec5dF;
|
typedef MobiusFermion<DomainWallVec5dImplF> MobiusFermionVec5dF;
|
||||||
typedef MobiusFermion<DomainWallVec5dImplD> MobiusFermionVec5dD;
|
typedef MobiusFermion<DomainWallVec5dImplD> MobiusFermionVec5dD;
|
||||||
|
|
||||||
|
typedef MobiusFermion<DomainWallVec5dImplRL> MobiusFermionVec5dRL;
|
||||||
|
typedef MobiusFermion<DomainWallVec5dImplFH> MobiusFermionVec5dFH;
|
||||||
|
typedef MobiusFermion<DomainWallVec5dImplDF> MobiusFermionVec5dDF;
|
||||||
|
|
||||||
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusFermionVec5dR;
|
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusFermionVec5dR;
|
||||||
typedef ZMobiusFermion<ZDomainWallVec5dImplF> ZMobiusFermionVec5dF;
|
typedef ZMobiusFermion<ZDomainWallVec5dImplF> ZMobiusFermionVec5dF;
|
||||||
typedef ZMobiusFermion<ZDomainWallVec5dImplD> ZMobiusFermionVec5dD;
|
typedef ZMobiusFermion<ZDomainWallVec5dImplD> ZMobiusFermionVec5dD;
|
||||||
|
|
||||||
|
typedef ZMobiusFermion<ZDomainWallVec5dImplRL> ZMobiusFermionVec5dRL;
|
||||||
|
typedef ZMobiusFermion<ZDomainWallVec5dImplFH> ZMobiusFermionVec5dFH;
|
||||||
|
typedef ZMobiusFermion<ZDomainWallVec5dImplDF> ZMobiusFermionVec5dDF;
|
||||||
|
|
||||||
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
||||||
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
|
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
|
||||||
@ -166,17 +193,35 @@ typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplD> OverlapWilsonP
|
|||||||
typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR;
|
typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR;
|
||||||
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
|
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
|
||||||
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
|
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
|
||||||
|
|
||||||
|
typedef WilsonFermion<GparityWilsonImplRL> GparityWilsonFermionRL;
|
||||||
|
typedef WilsonFermion<GparityWilsonImplFH> GparityWilsonFermionFH;
|
||||||
|
typedef WilsonFermion<GparityWilsonImplDF> GparityWilsonFermionDF;
|
||||||
|
|
||||||
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
|
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
|
||||||
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
|
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
|
||||||
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
|
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
|
||||||
|
|
||||||
|
typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL;
|
||||||
|
typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH;
|
||||||
|
typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF;
|
||||||
|
|
||||||
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
|
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
|
||||||
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
|
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
|
||||||
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
|
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
|
||||||
|
|
||||||
|
typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL;
|
||||||
|
typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH;
|
||||||
|
typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF;
|
||||||
|
|
||||||
typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
|
typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
|
||||||
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
|
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
|
||||||
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
|
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
|
||||||
|
|
||||||
|
typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL;
|
||||||
|
typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH;
|
||||||
|
typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF;
|
||||||
|
|
||||||
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
|
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
|
||||||
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
|
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
|
||||||
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
|
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
|
||||||
|
@ -55,7 +55,14 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
|||||||
template class A<ZWilsonImplF>; \
|
template class A<ZWilsonImplF>; \
|
||||||
template class A<ZWilsonImplD>; \
|
template class A<ZWilsonImplD>; \
|
||||||
template class A<GparityWilsonImplF>; \
|
template class A<GparityWilsonImplF>; \
|
||||||
template class A<GparityWilsonImplD>;
|
template class A<GparityWilsonImplD>; \
|
||||||
|
template class A<WilsonImplFH>; \
|
||||||
|
template class A<WilsonImplDF>; \
|
||||||
|
template class A<ZWilsonImplFH>; \
|
||||||
|
template class A<ZWilsonImplDF>; \
|
||||||
|
template class A<GparityWilsonImplFH>; \
|
||||||
|
template class A<GparityWilsonImplDF>;
|
||||||
|
|
||||||
|
|
||||||
#define AdjointFermOpTemplateInstantiate(A) \
|
#define AdjointFermOpTemplateInstantiate(A) \
|
||||||
template class A<WilsonAdjImplF>; \
|
template class A<WilsonAdjImplF>; \
|
||||||
@ -69,7 +76,11 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
|||||||
template class A<DomainWallVec5dImplF>; \
|
template class A<DomainWallVec5dImplF>; \
|
||||||
template class A<DomainWallVec5dImplD>; \
|
template class A<DomainWallVec5dImplD>; \
|
||||||
template class A<ZDomainWallVec5dImplF>; \
|
template class A<ZDomainWallVec5dImplF>; \
|
||||||
template class A<ZDomainWallVec5dImplD>;
|
template class A<ZDomainWallVec5dImplD>; \
|
||||||
|
template class A<DomainWallVec5dImplFH>; \
|
||||||
|
template class A<DomainWallVec5dImplDF>; \
|
||||||
|
template class A<ZDomainWallVec5dImplFH>; \
|
||||||
|
template class A<ZDomainWallVec5dImplDF>;
|
||||||
|
|
||||||
#define FermOpTemplateInstantiate(A) \
|
#define FermOpTemplateInstantiate(A) \
|
||||||
FermOp4dVecTemplateInstantiate(A) \
|
FermOp4dVecTemplateInstantiate(A) \
|
||||||
|
@ -874,17 +874,17 @@ typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > WilsonImplR
|
|||||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
|
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
|
||||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
|
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
|
||||||
|
|
||||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRH; // Real.. whichever prec
|
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec
|
||||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
|
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
|
||||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDH; // Double
|
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double
|
||||||
|
|
||||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
|
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
|
||||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
|
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
|
||||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
|
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
|
||||||
|
|
||||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRH; // Real.. whichever prec
|
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec
|
||||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
|
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
|
||||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDH; // Double
|
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double
|
||||||
|
|
||||||
typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
|
typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
|
||||||
typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float
|
typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float
|
||||||
@ -898,25 +898,25 @@ typedef DomainWallVec5dImpl<vComplex ,Nc, CoeffReal> DomainWallVec5dImplR; // Re
|
|||||||
typedef DomainWallVec5dImpl<vComplexF,Nc, CoeffReal> DomainWallVec5dImplF; // Float
|
typedef DomainWallVec5dImpl<vComplexF,Nc, CoeffReal> DomainWallVec5dImplF; // Float
|
||||||
typedef DomainWallVec5dImpl<vComplexD,Nc, CoeffReal> DomainWallVec5dImplD; // Double
|
typedef DomainWallVec5dImpl<vComplexD,Nc, CoeffReal> DomainWallVec5dImplD; // Double
|
||||||
|
|
||||||
typedef DomainWallVec5dImpl<vComplex ,Nc, CoeffRealHalfComms> DomainWallVec5dImplRH; // Real.. whichever prec
|
typedef DomainWallVec5dImpl<vComplex ,Nc, CoeffRealHalfComms> DomainWallVec5dImplRL; // Real.. whichever prec
|
||||||
typedef DomainWallVec5dImpl<vComplexF,Nc, CoeffRealHalfComms> DomainWallVec5dImplFH; // Float
|
typedef DomainWallVec5dImpl<vComplexF,Nc, CoeffRealHalfComms> DomainWallVec5dImplFH; // Float
|
||||||
typedef DomainWallVec5dImpl<vComplexD,Nc, CoeffRealHalfComms> DomainWallVec5dImplDH; // Double
|
typedef DomainWallVec5dImpl<vComplexD,Nc, CoeffRealHalfComms> DomainWallVec5dImplDF; // Double
|
||||||
|
|
||||||
typedef DomainWallVec5dImpl<vComplex ,Nc,CoeffComplex> ZDomainWallVec5dImplR; // Real.. whichever prec
|
typedef DomainWallVec5dImpl<vComplex ,Nc,CoeffComplex> ZDomainWallVec5dImplR; // Real.. whichever prec
|
||||||
typedef DomainWallVec5dImpl<vComplexF,Nc,CoeffComplex> ZDomainWallVec5dImplF; // Float
|
typedef DomainWallVec5dImpl<vComplexF,Nc,CoeffComplex> ZDomainWallVec5dImplF; // Float
|
||||||
typedef DomainWallVec5dImpl<vComplexD,Nc,CoeffComplex> ZDomainWallVec5dImplD; // Double
|
typedef DomainWallVec5dImpl<vComplexD,Nc,CoeffComplex> ZDomainWallVec5dImplD; // Double
|
||||||
|
|
||||||
typedef DomainWallVec5dImpl<vComplex ,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplRH; // Real.. whichever prec
|
typedef DomainWallVec5dImpl<vComplex ,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplRL; // Real.. whichever prec
|
||||||
typedef DomainWallVec5dImpl<vComplexF,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplFH; // Float
|
typedef DomainWallVec5dImpl<vComplexF,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplFH; // Float
|
||||||
typedef DomainWallVec5dImpl<vComplexD,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplDH; // Double
|
typedef DomainWallVec5dImpl<vComplexD,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplDF; // Double
|
||||||
|
|
||||||
typedef GparityWilsonImpl<vComplex , Nc,CoeffReal> GparityWilsonImplR; // Real.. whichever prec
|
typedef GparityWilsonImpl<vComplex , Nc,CoeffReal> GparityWilsonImplR; // Real.. whichever prec
|
||||||
typedef GparityWilsonImpl<vComplexF, Nc,CoeffReal> GparityWilsonImplF; // Float
|
typedef GparityWilsonImpl<vComplexF, Nc,CoeffReal> GparityWilsonImplF; // Float
|
||||||
typedef GparityWilsonImpl<vComplexD, Nc,CoeffReal> GparityWilsonImplD; // Double
|
typedef GparityWilsonImpl<vComplexD, Nc,CoeffReal> GparityWilsonImplD; // Double
|
||||||
|
|
||||||
typedef GparityWilsonImpl<vComplex , Nc,CoeffRealHalfComms> GparityWilsonImplRH; // Real.. whichever prec
|
typedef GparityWilsonImpl<vComplex , Nc,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec
|
||||||
typedef GparityWilsonImpl<vComplexF, Nc,CoeffRealHalfComms> GparityWilsonImplFH; // Float
|
typedef GparityWilsonImpl<vComplexF, Nc,CoeffRealHalfComms> GparityWilsonImplFH; // Float
|
||||||
typedef GparityWilsonImpl<vComplexD, Nc,CoeffRealHalfComms> GparityWilsonImplDH; // Double
|
typedef GparityWilsonImpl<vComplexD, Nc,CoeffRealHalfComms> GparityWilsonImplDF; // Double
|
||||||
|
|
||||||
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
|
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
|
||||||
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
|
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
|
||||||
|
@ -679,7 +679,6 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
FermOpTemplateInstantiate(WilsonFermion5D);
|
FermOpTemplateInstantiate(WilsonFermion5D);
|
||||||
GparityFermOpTemplateInstantiate(WilsonFermion5D);
|
GparityFermOpTemplateInstantiate(WilsonFermion5D);
|
||||||
|
|
||||||
|
@ -112,5 +112,16 @@ INSTANTIATE_ASM(DomainWallVec5dImplD);
|
|||||||
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
|
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
|
||||||
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
|
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
|
||||||
|
|
||||||
|
INSTANTIATE_ASM(WilsonImplFH);
|
||||||
|
INSTANTIATE_ASM(WilsonImplDF);
|
||||||
|
INSTANTIATE_ASM(ZWilsonImplFH);
|
||||||
|
INSTANTIATE_ASM(ZWilsonImplDF);
|
||||||
|
INSTANTIATE_ASM(GparityWilsonImplFH);
|
||||||
|
INSTANTIATE_ASM(GparityWilsonImplDF);
|
||||||
|
INSTANTIATE_ASM(DomainWallVec5dImplFH);
|
||||||
|
INSTANTIATE_ASM(DomainWallVec5dImplDF);
|
||||||
|
INSTANTIATE_ASM(ZDomainWallVec5dImplFH);
|
||||||
|
INSTANTIATE_ASM(ZDomainWallVec5dImplDF);
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
@ -829,6 +829,36 @@ WilsonKernels<GparityWilsonImplD>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> void
|
||||||
|
WilsonKernels<GparityWilsonImplFH>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
|
SiteHalfSpinor *buf,
|
||||||
|
int sF,int sU,const FermionField &in, FermionField &out,int internal,int external)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> void
|
||||||
|
WilsonKernels<GparityWilsonImplFH>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
|
SiteHalfSpinor *buf,
|
||||||
|
int sF,int sU,const FermionField &in, FermionField &out,int internal,int external)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> void
|
||||||
|
WilsonKernels<GparityWilsonImplDF>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||||
|
int sF,int sU,const FermionField &in, FermionField &out,int internal,int external)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> void
|
||||||
|
WilsonKernels<GparityWilsonImplDF>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||||
|
int sF,int sU,const FermionField &in, FermionField &out,int internal,int external)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////// Wilson ; uses this implementation /////////////////////
|
////////////// Wilson ; uses this implementation /////////////////////
|
||||||
@ -850,5 +880,15 @@ INSTANTIATE_THEM(DomainWallVec5dImplF);
|
|||||||
INSTANTIATE_THEM(DomainWallVec5dImplD);
|
INSTANTIATE_THEM(DomainWallVec5dImplD);
|
||||||
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
|
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
|
||||||
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
|
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
|
||||||
|
INSTANTIATE_THEM(WilsonImplFH);
|
||||||
|
INSTANTIATE_THEM(WilsonImplDF);
|
||||||
|
INSTANTIATE_THEM(ZWilsonImplFH);
|
||||||
|
INSTANTIATE_THEM(ZWilsonImplDF);
|
||||||
|
INSTANTIATE_THEM(GparityWilsonImplFH);
|
||||||
|
INSTANTIATE_THEM(GparityWilsonImplDF);
|
||||||
|
INSTANTIATE_THEM(DomainWallVec5dImplFH);
|
||||||
|
INSTANTIATE_THEM(DomainWallVec5dImplDF);
|
||||||
|
INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
|
||||||
|
INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -901,8 +901,9 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
|
|||||||
int reduced_buffer_size = buffer_size;
|
int reduced_buffer_size = buffer_size;
|
||||||
if (cbmask != 0x3) reduced_buffer_size=buffer_size>>1;
|
if (cbmask != 0x3) reduced_buffer_size=buffer_size>>1;
|
||||||
|
|
||||||
int bytes = (reduced_buffer_size*sizeof(cobj))/simd_layout;
|
int datum_bytes = compress.CommDatumSize();
|
||||||
assert(bytes*simd_layout == reduced_buffer_size*sizeof(cobj));
|
int bytes = (reduced_buffer_size*datum_bytes)/simd_layout;
|
||||||
|
assert(bytes*simd_layout == reduced_buffer_size*datum_bytes);
|
||||||
|
|
||||||
std::vector<cobj *> rpointers(maxl);
|
std::vector<cobj *> rpointers(maxl);
|
||||||
std::vector<cobj *> spointers(maxl);
|
std::vector<cobj *> spointers(maxl);
|
||||||
|
@ -142,6 +142,17 @@ namespace Grid {
|
|||||||
typedef vRealD Realified;
|
typedef vRealD Realified;
|
||||||
enum { TensorLevel = 0 };
|
enum { TensorLevel = 0 };
|
||||||
};
|
};
|
||||||
|
template<> class GridTypeMapper<vComplexH> {
|
||||||
|
public:
|
||||||
|
typedef ComplexF scalar_type;
|
||||||
|
typedef vComplexH vector_type;
|
||||||
|
typedef vComplexD vector_typeD;
|
||||||
|
typedef vComplexH tensor_reduced;
|
||||||
|
typedef ComplexF scalar_object;
|
||||||
|
typedef vComplexH Complexified;
|
||||||
|
typedef vRealH Realified;
|
||||||
|
enum { TensorLevel = 0 };
|
||||||
|
};
|
||||||
template<> class GridTypeMapper<vComplexF> {
|
template<> class GridTypeMapper<vComplexF> {
|
||||||
public:
|
public:
|
||||||
typedef ComplexF scalar_type;
|
typedef ComplexF scalar_type;
|
||||||
|
Loading…
Reference in New Issue
Block a user