mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Merge branch 'develop' into feature/hmc_generalise
This commit is contained in:
		@@ -186,10 +186,10 @@ namespace Grid {
 | 
			
		||||
      inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
 | 
			
		||||
	
 | 
			
		||||
	int Ls=Btilde._grid->_fdimensions[0];
 | 
			
		||||
	
 | 
			
		||||
	GaugeLinkField tmp(mat._grid);
 | 
			
		||||
	tmp = zero;
 | 
			
		||||
	PARALLEL_FOR_LOOP
 | 
			
		||||
 | 
			
		||||
        PARALLEL_FOR_LOOP
 | 
			
		||||
	  for(int sss=0;sss<tmp._grid->oSites();sss++){
 | 
			
		||||
	    int sU=sss;
 | 
			
		||||
	    for(int s=0;s<Ls;s++){
 | 
			
		||||
@@ -198,7 +198,7 @@ namespace Grid {
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	PokeIndex<LorentzIndex>(mat,tmp,mu);
 | 
			
		||||
	
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -42,11 +42,11 @@ const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1
 | 
			
		||||
  // 5d lattice for DWF.
 | 
			
		||||
template<class Impl>
 | 
			
		||||
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
			
		||||
				       GridCartesian         &FiveDimGrid,
 | 
			
		||||
				       GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
				       GridCartesian         &FourDimGrid,
 | 
			
		||||
				       GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
				       RealD _M5,const ImplParams &p) :
 | 
			
		||||
               GridCartesian         &FiveDimGrid,
 | 
			
		||||
               GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
               GridCartesian         &FourDimGrid,
 | 
			
		||||
               GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
               RealD _M5,const ImplParams &p) :
 | 
			
		||||
  Kernels(p),
 | 
			
		||||
  _FiveDimGrid        (&FiveDimGrid),
 | 
			
		||||
  _FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
 | 
			
		||||
@@ -135,10 +135,10 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
			
		||||
  /*
 | 
			
		||||
template<class Impl>
 | 
			
		||||
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
 | 
			
		||||
				       GridCartesian         &FiveDimGrid,
 | 
			
		||||
				       GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
				       GridCartesian         &FourDimGrid,
 | 
			
		||||
				       RealD _M5,const ImplParams &p) :
 | 
			
		||||
               GridCartesian         &FiveDimGrid,
 | 
			
		||||
               GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
               GridCartesian         &FourDimGrid,
 | 
			
		||||
               RealD _M5,const ImplParams &p) :
 | 
			
		||||
{
 | 
			
		||||
  int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
@@ -175,6 +175,73 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
 | 
			
		||||
}  
 | 
			
		||||
  */
 | 
			
		||||
     
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::Report(void)
 | 
			
		||||
{
 | 
			
		||||
    std::vector<int> latt = GridDefaultLatt();          
 | 
			
		||||
    RealD volume = Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
 | 
			
		||||
    RealD NP = _FourDimGrid->_Nprocessors;
 | 
			
		||||
 | 
			
		||||
  if ( DhopCalls > 0 ) {
 | 
			
		||||
    std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls     : " << DhopCalls  << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime
 | 
			
		||||
              << " us" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls           : "
 | 
			
		||||
              << DhopCommTime / DhopCalls << " us" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D Total Compute time       : "
 | 
			
		||||
              << DhopComputeTime << " us" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls        : "
 | 
			
		||||
              << DhopComputeTime / DhopCalls << " us" << std::endl;
 | 
			
		||||
 | 
			
		||||
    RealD mflops = 1344*volume*DhopCalls/DhopComputeTime;
 | 
			
		||||
    std::cout << GridLogMessage << "Average mflops/s per call                : " << mflops << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "Average mflops/s per call per node       : " << mflops/NP << std::endl;
 | 
			
		||||
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
  if ( DerivCalls > 0 ) {
 | 
			
		||||
  std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls    : " <<DerivCalls <<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls           : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Total Compute time       : " <<DerivComputeTime <<" us"<<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls        : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time  : " <<DerivDhopComputeTime <<" us"<<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls   : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
 | 
			
		||||
  std::cout << GridLogMessage << "Average mflops/s per call                : " << mflops << std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "Average mflops/s per call per node       : " << mflops/NP << std::endl;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (DerivCalls > 0 || DhopCalls > 0){
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl;  Stencil.Report();
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl;  StencilEven.Report();
 | 
			
		||||
  std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl;  StencilOdd.Report();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::ZeroCounters(void) {
 | 
			
		||||
  DhopCalls       = 0;
 | 
			
		||||
  DhopCommTime    = 0;
 | 
			
		||||
  DhopComputeTime = 0;
 | 
			
		||||
 | 
			
		||||
  DerivCalls       = 0;
 | 
			
		||||
  DerivCommTime    = 0;
 | 
			
		||||
  DerivComputeTime = 0;
 | 
			
		||||
  DerivDhopComputeTime = 0;
 | 
			
		||||
 | 
			
		||||
  Stencil.ZeroCounters();
 | 
			
		||||
  StencilEven.ZeroCounters();
 | 
			
		||||
  StencilOdd.ZeroCounters();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
			
		||||
{
 | 
			
		||||
@@ -215,12 +282,13 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
 | 
			
		||||
					  DoubledGaugeField & U,
 | 
			
		||||
					  GaugeField &mat,
 | 
			
		||||
					  const FermionField &A,
 | 
			
		||||
					  const FermionField &B,
 | 
			
		||||
					  int dag)
 | 
			
		||||
            DoubledGaugeField & U,
 | 
			
		||||
            GaugeField &mat,
 | 
			
		||||
            const FermionField &A,
 | 
			
		||||
            const FermionField &B,
 | 
			
		||||
            int dag)
 | 
			
		||||
{
 | 
			
		||||
  DerivCalls++;
 | 
			
		||||
  assert((dag==DaggerNo) ||(dag==DaggerYes));
 | 
			
		||||
 | 
			
		||||
  conformable(st._grid,A._grid);
 | 
			
		||||
@@ -231,51 +299,53 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
 | 
			
		||||
  FermionField Btilde(B._grid);
 | 
			
		||||
  FermionField Atilde(B._grid);
 | 
			
		||||
 | 
			
		||||
  DerivCommTime-=usecond();
 | 
			
		||||
  st.HaloExchange(B,compressor);
 | 
			
		||||
  DerivCommTime+=usecond();
 | 
			
		||||
 | 
			
		||||
  Atilde=A;
 | 
			
		||||
 | 
			
		||||
  for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
      
 | 
			
		||||
  DerivComputeTime-=usecond();
 | 
			
		||||
  for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Flip gamma if dag
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    int gamma = mu;
 | 
			
		||||
    if ( !dag ) gamma+= Nd;
 | 
			
		||||
    if (!dag) gamma += Nd;
 | 
			
		||||
 | 
			
		||||
    ////////////////////////
 | 
			
		||||
    // Call the single hop
 | 
			
		||||
    ////////////////////////
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
    for(int sss=0;sss<U._grid->oSites();sss++){
 | 
			
		||||
      for(int s=0;s<Ls;s++){
 | 
			
		||||
	int sU=sss;
 | 
			
		||||
	int sF = s+Ls*sU;
 | 
			
		||||
    DerivDhopComputeTime -= usecond();
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int sss = 0; sss < U._grid->oSites(); sss++) {
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
        int sU = sss;
 | 
			
		||||
        int sF = s + Ls * sU;
 | 
			
		||||
 | 
			
		||||
	assert ( sF< B._grid->oSites());
 | 
			
		||||
	assert ( sU< U._grid->oSites());
 | 
			
		||||
        assert(sF < B._grid->oSites());
 | 
			
		||||
        assert(sU < U._grid->oSites());
 | 
			
		||||
 | 
			
		||||
	Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma);
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////
 | 
			
		||||
    // spin trace outer product
 | 
			
		||||
    ////////////////////////////
 | 
			
		||||
        Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu,
 | 
			
		||||
                                 gamma);
 | 
			
		||||
 | 
			
		||||
        ////////////////////////////
 | 
			
		||||
        // spin trace outer product
 | 
			
		||||
        ////////////////////////////
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Impl::InsertForce5D(mat,Btilde,Atilde,mu);
 | 
			
		||||
 | 
			
		||||
    DerivDhopComputeTime += usecond();
 | 
			
		||||
    Impl::InsertForce5D(mat, Btilde, Atilde, mu);
 | 
			
		||||
  }
 | 
			
		||||
  DerivComputeTime += usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DhopDeriv(      GaugeField &mat,
 | 
			
		||||
					    const FermionField &A,
 | 
			
		||||
					    const FermionField &B,
 | 
			
		||||
					    int dag)
 | 
			
		||||
              const FermionField &A,
 | 
			
		||||
              const FermionField &B,
 | 
			
		||||
              int dag)
 | 
			
		||||
{
 | 
			
		||||
  conformable(A._grid,FermionGrid());  
 | 
			
		||||
  conformable(A._grid,B._grid);
 | 
			
		||||
@@ -288,9 +358,9 @@ void WilsonFermion5D<Impl>::DhopDeriv(      GaugeField &mat,
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
 | 
			
		||||
					const FermionField &A,
 | 
			
		||||
					const FermionField &B,
 | 
			
		||||
					int dag)
 | 
			
		||||
          const FermionField &A,
 | 
			
		||||
          const FermionField &B,
 | 
			
		||||
          int dag)
 | 
			
		||||
{
 | 
			
		||||
  conformable(A._grid,FermionRedBlackGrid());
 | 
			
		||||
  conformable(GaugeRedBlackGrid(),mat._grid);
 | 
			
		||||
@@ -306,9 +376,9 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
 | 
			
		||||
				  const FermionField &A,
 | 
			
		||||
				  const FermionField &B,
 | 
			
		||||
				  int dag)
 | 
			
		||||
          const FermionField &A,
 | 
			
		||||
          const FermionField &B,
 | 
			
		||||
          int dag)
 | 
			
		||||
{
 | 
			
		||||
  conformable(A._grid,FermionRedBlackGrid());
 | 
			
		||||
  conformable(GaugeRedBlackGrid(),mat._grid);
 | 
			
		||||
@@ -323,32 +393,61 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
 | 
			
		||||
					 DoubledGaugeField & U,
 | 
			
		||||
					 const FermionField &in, FermionField &out,int dag)
 | 
			
		||||
           DoubledGaugeField & U,
 | 
			
		||||
           const FermionField &in, FermionField &out,int dag)
 | 
			
		||||
{
 | 
			
		||||
  DhopCalls++;
 | 
			
		||||
  //  assert((dag==DaggerNo) ||(dag==DaggerYes));
 | 
			
		||||
  Compressor compressor(dag);
 | 
			
		||||
 | 
			
		||||
  int LLs = in._grid->_rdimensions[0];
 | 
			
		||||
  
 | 
			
		||||
  DhopCommTime-=usecond();
 | 
			
		||||
  st.HaloExchange(in,compressor);
 | 
			
		||||
  DhopCommTime+=usecond();
 | 
			
		||||
  
 | 
			
		||||
  DhopComputeTime-=usecond();
 | 
			
		||||
  // Dhop takes the 4d grid from U, and makes a 5d index for fermion
 | 
			
		||||
  if ( dag == DaggerYes ) {
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
    for(int ss=0;ss<U._grid->oSites();ss++){
 | 
			
		||||
	int sU=ss;
 | 
			
		||||
	int sF=LLs*sU;
 | 
			
		||||
	Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
 | 
			
		||||
  if (dag == DaggerYes) {
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int ss = 0; ss < U._grid->oSites(); ss++) {
 | 
			
		||||
      int sU = ss;
 | 
			
		||||
      int sF = LLs * sU;
 | 
			
		||||
      Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
 | 
			
		||||
                                   out);
 | 
			
		||||
    }
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
  } else if (stat.is_init() ) {
 | 
			
		||||
 | 
			
		||||
    int nthreads;
 | 
			
		||||
    stat.start();
 | 
			
		||||
    #pragma omp parallel
 | 
			
		||||
    {
 | 
			
		||||
    #pragma omp master
 | 
			
		||||
    nthreads = omp_get_num_threads();
 | 
			
		||||
    int mythread = omp_get_thread_num();
 | 
			
		||||
    stat.enter(mythread);
 | 
			
		||||
    #pragma omp for nowait
 | 
			
		||||
   for(int ss=0;ss<U._grid->oSites();ss++)
 | 
			
		||||
    {
 | 
			
		||||
       int sU=ss;
 | 
			
		||||
       int sF=LLs*sU;
 | 
			
		||||
       Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
 | 
			
		||||
     }
 | 
			
		||||
    stat.exit(mythread);
 | 
			
		||||
    }
 | 
			
		||||
    stat.accum(nthreads);
 | 
			
		||||
#endif
 | 
			
		||||
  } else {
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
    for(int ss=0;ss<U._grid->oSites();ss++){
 | 
			
		||||
      int sU=ss;
 | 
			
		||||
      int sF=LLs*sU;
 | 
			
		||||
      Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int ss = 0; ss < U._grid->oSites(); ss++) {
 | 
			
		||||
      int sU = ss;
 | 
			
		||||
      int sF = LLs * sU;
 | 
			
		||||
      Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
 | 
			
		||||
                                out);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  DhopComputeTime+=usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -31,6 +31,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef  GRID_QCD_WILSON_FERMION_5D_H
 | 
			
		||||
#define  GRID_QCD_WILSON_FERMION_5D_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/Stat.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
@@ -60,6 +62,18 @@ namespace Grid {
 | 
			
		||||
    public:
 | 
			
		||||
     INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
     typedef WilsonKernels<Impl> Kernels;
 | 
			
		||||
     PmuStat stat;
 | 
			
		||||
 | 
			
		||||
     void Report(void);
 | 
			
		||||
     void ZeroCounters(void);
 | 
			
		||||
     double DhopCalls;
 | 
			
		||||
     double DhopCommTime;
 | 
			
		||||
     double DhopComputeTime;
 | 
			
		||||
 | 
			
		||||
     double DerivCalls;
 | 
			
		||||
     double DerivCommTime;
 | 
			
		||||
     double DerivComputeTime;
 | 
			
		||||
     double DerivDhopComputeTime;
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
      // Implement the abstract base
 | 
			
		||||
 
 | 
			
		||||
@@ -196,24 +196,11 @@ namespace Grid {
 | 
			
		||||
				    WilsonKernels(const ImplParams &p = ImplParams());
 | 
			
		||||
				  };
 | 
			
		||||
    
 | 
			
		||||
	///////////////////////////////////////////////////////////
 | 
			
		||||
	// Default to no assembler implementation
 | 
			
		||||
	///////////////////////////////////////////////////////////
 | 
			
		||||
	template<class Impl>
 | 
			
		||||
	  void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
							 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
	{
 | 
			
		||||
	  assert(0);
 | 
			
		||||
	}
 | 
			
		||||
	template<class Impl>
 | 
			
		||||
	  void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
							    std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
	{
 | 
			
		||||
	  assert(0);
 | 
			
		||||
	}
 | 
			
		||||
  
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -31,9 +31,30 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
    
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    // Default to no assembler implementation
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
      void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
                             std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
                             int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
    {
 | 
			
		||||
      assert(0);
 | 
			
		||||
    }
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
      void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
                                std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
                                int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
    {
 | 
			
		||||
      assert(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(AVX512) 
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
@@ -102,6 +123,27 @@ namespace Grid {
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
				    
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define INSTANTIATE_ASM(A)\
 | 
			
		||||
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
			
		||||
                                   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
			
		||||
                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
			
		||||
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
			
		||||
                                   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
			
		||||
                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
INSTANTIATE_ASM(WilsonImplF);
 | 
			
		||||
INSTANTIATE_ASM(WilsonImplD);
 | 
			
		||||
INSTANTIATE_ASM(ZWilsonImplF);
 | 
			
		||||
INSTANTIATE_ASM(ZWilsonImplD);
 | 
			
		||||
INSTANTIATE_ASM(GparityWilsonImplF);
 | 
			
		||||
INSTANTIATE_ASM(GparityWilsonImplD);
 | 
			
		||||
INSTANTIATE_ASM(DomainWallVec5dImplF);
 | 
			
		||||
INSTANTIATE_ASM(DomainWallVec5dImplD);
 | 
			
		||||
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
 | 
			
		||||
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -134,7 +134,9 @@
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Xm
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
#ifndef STREAM_STORE
 | 
			
		||||
  basep= (uint64_t) &out._odata[ss];
 | 
			
		||||
#endif
 | 
			
		||||
  //  basep= st.GetPFInfo(nent,plocal); nent++;
 | 
			
		||||
  if ( local ) {
 | 
			
		||||
    LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
@@ -229,7 +231,9 @@
 | 
			
		||||
    LOAD_CHI(base);
 | 
			
		||||
  }
 | 
			
		||||
  base= (uint64_t) &out._odata[ss];
 | 
			
		||||
#ifndef STREAM_STORE
 | 
			
		||||
  PREFETCH_CHIMU(base);
 | 
			
		||||
#endif
 | 
			
		||||
  {
 | 
			
		||||
    MULT_2SPIN_DIR_PFTM(Tm,basep);
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -131,9 +131,11 @@ namespace Grid{
 | 
			
		||||
	Vpc.MpcDag(PhiOdd,Y);           // Y= Vdag phi
 | 
			
		||||
	X=zero;
 | 
			
		||||
	ActionSolver(Mpc,Y,X);          // X= (MdagM)^-1 Vdag phi
 | 
			
		||||
	Mpc.Mpc(X,Y);                   // Y=  Mdag^-1 Vdag phi
 | 
			
		||||
	//Mpc.Mpc(X,Y);                   // Y=  Mdag^-1 Vdag phi
 | 
			
		||||
	// Multiply by Ydag
 | 
			
		||||
	RealD action = real(innerProduct(Y,X));
 | 
			
		||||
 | 
			
		||||
	RealD action = norm2(Y);
 | 
			
		||||
	//RealD action = norm2(Y);
 | 
			
		||||
 | 
			
		||||
	// The EE factorised block; normally can replace with zero if det is constant (gauge field indept)
 | 
			
		||||
	// Only really clover term that creates this. Leave the EE portion as a future to do to make most
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user