mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	simd in 5th dimension support
This commit is contained in:
		@@ -288,11 +288,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
  void WilsonFermion<Impl>::DhopInternal(StencilImpl & st,DoubledGaugeField & U,
 | 
					  void WilsonFermion<Impl>::DhopInternal(StencilImpl & st,DoubledGaugeField & U,
 | 
				
			||||||
					 const FermionField &in, FermionField &out,int dag) 
 | 
										 const FermionField &in, FermionField &out,int dag) 
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    if ( Impl::overlapCommsCompute () ) { 
 | 
					    DhopInternalCommsThenCompute(st,U,in,out,dag);
 | 
				
			||||||
      DhopInternalCommsOverlapCompute(st,U,in,out,dag);
 | 
					 | 
				
			||||||
    } else { 
 | 
					 | 
				
			||||||
      DhopInternalCommsThenCompute(st,U,in,out,dag);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  template<class Impl>
 | 
					  template<class Impl>
 | 
				
			||||||
  void WilsonFermion<Impl>::DhopInternalCommsThenCompute(StencilImpl & st,DoubledGaugeField & U,
 | 
					  void WilsonFermion<Impl>::DhopInternalCommsThenCompute(StencilImpl & st,DoubledGaugeField & U,
 | 
				
			||||||
@@ -331,15 +327,6 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
  template<class Impl>
 | 
					 | 
				
			||||||
  void WilsonFermion<Impl>::DhopInternalCommsOverlapCompute(StencilImpl & st,DoubledGaugeField & U,
 | 
					 | 
				
			||||||
						     const FermionField &in, FermionField &out,int dag) {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    assert(0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 
 | 
					 | 
				
			||||||
  FermOpTemplateInstantiate(WilsonFermion);
 | 
					  FermOpTemplateInstantiate(WilsonFermion);
 | 
				
			||||||
  GparityFermOpTemplateInstantiate(WilsonFermion);
 | 
					  GparityFermOpTemplateInstantiate(WilsonFermion);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -116,9 +116,6 @@ namespace Grid {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
      void DhopInternalCommsThenCompute(StencilImpl & st,DoubledGaugeField & U,
 | 
					      void DhopInternalCommsThenCompute(StencilImpl & st,DoubledGaugeField & U,
 | 
				
			||||||
				    const FermionField &in, FermionField &out,int dag) ;
 | 
									    const FermionField &in, FermionField &out,int dag) ;
 | 
				
			||||||
      void DhopInternalCommsOverlapCompute(StencilImpl & st,DoubledGaugeField & U,
 | 
					 | 
				
			||||||
				    const FermionField &in, FermionField &out,int dag) ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Constructor
 | 
					      // Constructor
 | 
				
			||||||
      WilsonFermion(GaugeField &_Umu,
 | 
					      WilsonFermion(GaugeField &_Umu,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -68,10 +68,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
				
			|||||||
  // some assertions
 | 
					  // some assertions
 | 
				
			||||||
  assert(FiveDimGrid._ndimension==5);
 | 
					  assert(FiveDimGrid._ndimension==5);
 | 
				
			||||||
  assert(FourDimGrid._ndimension==4);
 | 
					  assert(FourDimGrid._ndimension==4);
 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  assert(FiveDimRedBlackGrid._ndimension==5);
 | 
					  assert(FiveDimRedBlackGrid._ndimension==5);
 | 
				
			||||||
  assert(FourDimRedBlackGrid._ndimension==4);
 | 
					  assert(FourDimRedBlackGrid._ndimension==4);
 | 
				
			||||||
 | 
					 | 
				
			||||||
  assert(FiveDimRedBlackGrid._checker_dim==1);
 | 
					  assert(FiveDimRedBlackGrid._checker_dim==1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Dimension zero of the five-d is the Ls direction
 | 
					  // Dimension zero of the five-d is the Ls direction
 | 
				
			||||||
@@ -106,11 +104,75 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
				
			|||||||
  dslashtime=0;
 | 
					  dslashtime=0;
 | 
				
			||||||
  dslash1time=0;
 | 
					  dslash1time=0;
 | 
				
			||||||
}  
 | 
					}  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					WilsonFermion5D<Impl>::WilsonFermion5D(int simd, GaugeField &_Umu,
 | 
				
			||||||
 | 
									       GridCartesian         &FiveDimGrid,
 | 
				
			||||||
 | 
									       GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
				
			||||||
 | 
									       GridCartesian         &FourDimGrid,
 | 
				
			||||||
 | 
									       GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
				
			||||||
 | 
									       RealD _M5,const ImplParams &p) :
 | 
				
			||||||
 | 
					  Kernels(p),
 | 
				
			||||||
 | 
					  _FiveDimGrid        (&FiveDimGrid),
 | 
				
			||||||
 | 
					  _FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
 | 
				
			||||||
 | 
					  _FourDimGrid        (&FourDimGrid),
 | 
				
			||||||
 | 
					  _FourDimRedBlackGrid(&FourDimRedBlackGrid),
 | 
				
			||||||
 | 
					  Stencil    (_FiveDimGrid,npoint,Even,directions,displacements),
 | 
				
			||||||
 | 
					  StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
 | 
				
			||||||
 | 
					  StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
 | 
				
			||||||
 | 
					  M5(_M5),
 | 
				
			||||||
 | 
					  Umu(_FourDimGrid),
 | 
				
			||||||
 | 
					  UmuEven(_FourDimRedBlackGrid),
 | 
				
			||||||
 | 
					  UmuOdd (_FourDimRedBlackGrid),
 | 
				
			||||||
 | 
					  Lebesgue(_FourDimGrid),
 | 
				
			||||||
 | 
					  LebesgueEvenOdd(_FourDimRedBlackGrid)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int nsimd = Simd::Nsimd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // some assertions
 | 
				
			||||||
 | 
					  assert(FiveDimGrid._ndimension==5);
 | 
				
			||||||
 | 
					  assert(FiveDimRedBlackGrid._ndimension==5);
 | 
				
			||||||
 | 
					  assert(FiveDimRedBlackGrid._checker_dim==0); // Checkerboard the s-direction
 | 
				
			||||||
 | 
					  assert(FourDimGrid._ndimension==4);
 | 
				
			||||||
 | 
					  assert(FourDimRedBlackGrid._ndimension==4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Dimension zero of the five-d is the Ls direction
 | 
				
			||||||
 | 
					  Ls=FiveDimGrid._fdimensions[0];
 | 
				
			||||||
 | 
					  assert(FiveDimGrid._processors[0]         ==1);
 | 
				
			||||||
 | 
					  assert(FiveDimGrid._simd_layout[0]        ==nsimd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
 | 
				
			||||||
 | 
					  assert(FiveDimRedBlackGrid._processors[0] ==1);
 | 
				
			||||||
 | 
					  assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Other dimensions must match the decomposition of the four-D fields 
 | 
				
			||||||
 | 
					  for(int d=0;d<4;d++){
 | 
				
			||||||
 | 
					    assert(FourDimRedBlackGrid._fdimensions[d]  ==FourDimGrid._fdimensions[d]);
 | 
				
			||||||
 | 
					    assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(FourDimRedBlackGrid._processors[d]   ==FourDimGrid._processors[d]);
 | 
				
			||||||
 | 
					    assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(FourDimGrid._simd_layout[d]=1);
 | 
				
			||||||
 | 
					    assert(FourDimRedBlackGrid._simd_layout[d]  ==1);
 | 
				
			||||||
 | 
					    assert(FourDimRedBlackGrid._simd_layout[d]  ==1);
 | 
				
			||||||
 | 
					    assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(FiveDimGrid._fdimensions[d+1]        ==FourDimGrid._fdimensions[d]);
 | 
				
			||||||
 | 
					    assert(FiveDimGrid._processors[d+1]         ==FourDimGrid._processors[d]);
 | 
				
			||||||
 | 
					    assert(FiveDimGrid._simd_layout[d+1]        ==FourDimGrid._simd_layout[d]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Allocate the required comms buffer
 | 
				
			||||||
 | 
					  ImportGauge(_Umu);
 | 
				
			||||||
 | 
					}  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
					void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    GaugeField HUmu(_Umu._grid);
 | 
					  GaugeField HUmu(_Umu._grid);
 | 
				
			||||||
    HUmu = _Umu*(-0.5);
 | 
					  HUmu = _Umu*(-0.5);
 | 
				
			||||||
  Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
 | 
					  Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
 | 
				
			||||||
  pickCheckerboard(Even,UmuEven,Umu);
 | 
					  pickCheckerboard(Even,UmuEven,Umu);
 | 
				
			||||||
  pickCheckerboard(Odd ,UmuOdd,Umu);
 | 
					  pickCheckerboard(Odd ,UmuOdd,Umu);
 | 
				
			||||||
@@ -294,11 +356,7 @@ void WilsonFermion5D<Impl>::DhopInternalCommsThenCompute(StencilImpl & st, Lebes
 | 
				
			|||||||
  Compressor compressor(dag);
 | 
					  Compressor compressor(dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Assume balanced KMP_AFFINITY; this is forced in GridThread.h
 | 
					  // Assume balanced KMP_AFFINITY; this is forced in GridThread.h
 | 
				
			||||||
 | 
					  int LLs = in._grid->_rdimensions[0];
 | 
				
			||||||
  int threads = GridThread::GetThreads();
 | 
					 | 
				
			||||||
  int HT      = GridThread::GetHyperThreads();
 | 
					 | 
				
			||||||
  int cores   = GridThread::GetCores();
 | 
					 | 
				
			||||||
  int nwork = U._grid->oSites();
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  commtime -=usecond();
 | 
					  commtime -=usecond();
 | 
				
			||||||
  auto handle = st.HaloExchangeBegin(in,compressor);
 | 
					  auto handle = st.HaloExchangeBegin(in,compressor);
 | 
				
			||||||
@@ -318,97 +376,48 @@ void WilsonFermion5D<Impl>::DhopInternalCommsThenCompute(StencilImpl & st, Lebes
 | 
				
			|||||||
    if( this->HandOptDslash ) {
 | 
					    if( this->HandOptDslash ) {
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
	int sU=ss;
 | 
						for(int s=0;s<LLs;s++){
 | 
				
			||||||
	for(int s=0;s<Ls;s++){
 | 
						  int sU=ss;
 | 
				
			||||||
	  int sF = s+Ls*sU;
 | 
						  int sF = s+LLs*sU;
 | 
				
			||||||
	  Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
 | 
						  Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
 | 
				
			||||||
	  }
 | 
						  }
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    } else { 
 | 
					    } else { 
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
	{
 | 
						for(int s=0;s<LLs;s++){
 | 
				
			||||||
	  int sd;
 | 
						  int sU=ss;
 | 
				
			||||||
	  for(sd=0;sd<Ls;sd++){
 | 
						  int sF = s+LLs*sU;
 | 
				
			||||||
	    int sU=ss;
 | 
						  Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
 | 
				
			||||||
	    int sF = sd+Ls*sU;
 | 
					 | 
				
			||||||
	    Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  } else {
 | 
					  } else {
 | 
				
			||||||
    if( this->AsmOptDslash ) {
 | 
					    if( this->AsmOptDslash ) {
 | 
				
			||||||
      //      for(int i=0;i<1;i++){
 | 
					 | 
				
			||||||
      //      for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
 | 
					 | 
				
			||||||
      //	PerformanceCounter Counter(i);
 | 
					 | 
				
			||||||
      //	Counter.Start();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#pragma omp parallel for 
 | 
					 | 
				
			||||||
      for(int t=0;t<threads;t++){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	int hyperthread = t%HT;
 | 
					 | 
				
			||||||
	int core        = t/HT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        int sswork, swork,soff,ssoff,  sU,sF;
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	GridThread::GetWork(nwork,core,sswork,ssoff,cores);
 | 
					 | 
				
			||||||
	GridThread::GetWork(Ls   , hyperthread, swork, soff,HT);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for(int ss=0;ss<sswork;ss++){
 | 
					 | 
				
			||||||
	  for(int s=soff;s<soff+swork;s++){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	    sU=ss+ ssoff;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	    if ( LebesgueOrder::UseLebesgueOrder ) {
 | 
					 | 
				
			||||||
	      sU = lo.Reorder(sU);
 | 
					 | 
				
			||||||
	    }
 | 
					 | 
				
			||||||
	    sF = s+Ls*sU;
 | 
					 | 
				
			||||||
	    Kernels::DiracOptAsmDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
      //      Counter.Stop();
 | 
					 | 
				
			||||||
      //      Counter.Report();
 | 
					 | 
				
			||||||
      //      }
 | 
					 | 
				
			||||||
    } else if( this->HandOptDslash ) {
 | 
					 | 
				
			||||||
      /*
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#pragma omp parallel for schedule(static)
 | 
					 | 
				
			||||||
      for(int t=0;t<threads;t++){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	int hyperthread = t%HT;
 | 
					 | 
				
			||||||
	int core        = t/HT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        int sswork, swork,soff,ssoff,  sU,sF;
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	GridThread::GetWork(nwork,core,sswork,ssoff,cores);
 | 
					 | 
				
			||||||
	GridThread::GetWork(Ls   , hyperthread, swork, soff,HT);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for(int ss=0;ss<sswork;ss++){
 | 
					 | 
				
			||||||
	  sU=ss+ ssoff;
 | 
					 | 
				
			||||||
	  for(int s=soff;s<soff+swork;s++){
 | 
					 | 
				
			||||||
	    sF = s+Ls*sU;
 | 
					 | 
				
			||||||
	    Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
      */
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
	int sU=ss;
 | 
						for(int s=0;s<LLs;s++){
 | 
				
			||||||
	for(int s=0;s<Ls;s++){
 | 
						  int sU=ss;
 | 
				
			||||||
	  int sF = s+Ls*sU;
 | 
						  int sF = s+LLs*sU;
 | 
				
			||||||
 | 
						  Kernels::DiracOptAsmDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } else if( this->HandOptDslash ) {
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP     
 | 
				
			||||||
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
 | 
						for(int s=0;s<LLs;s++){
 | 
				
			||||||
 | 
						  int sU=ss;
 | 
				
			||||||
 | 
						  int sF = s+LLs*sU;
 | 
				
			||||||
	  Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
						  Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    } else { 
 | 
					    } else { 
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
	int sU=ss;
 | 
						for(int s=0;s<LLs;s++){
 | 
				
			||||||
	for(int s=0;s<Ls;s++){
 | 
						  int sU=ss;
 | 
				
			||||||
	  int sF = s+Ls*sU; 
 | 
						  int sF = s+LLs*sU; 
 | 
				
			||||||
	  Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
						  Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
@@ -418,251 +427,6 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
  alltime+=usecond();
 | 
					  alltime+=usecond();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl>
 | 
					 | 
				
			||||||
void WilsonFermion5D<Impl>::DhopInternalOMPbench(StencilImpl & st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
						 DoubledGaugeField & U,
 | 
					 | 
				
			||||||
						 const FermionField &in, FermionField &out,int dag)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  //  assert((dag==DaggerNo) ||(dag==DaggerYes));
 | 
					 | 
				
			||||||
  alltime-=usecond();
 | 
					 | 
				
			||||||
  Compressor compressor(dag);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Assume balanced KMP_AFFINITY; this is forced in GridThread.h
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int threads = GridThread::GetThreads();
 | 
					 | 
				
			||||||
  int HT      = GridThread::GetHyperThreads();
 | 
					 | 
				
			||||||
  int cores   = GridThread::GetCores();
 | 
					 | 
				
			||||||
  int nwork = U._grid->oSites();
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  commtime -=usecond();
 | 
					 | 
				
			||||||
  auto handle = st.HaloExchangeBegin(in,compressor);
 | 
					 | 
				
			||||||
  st.HaloExchangeComplete(handle);
 | 
					 | 
				
			||||||
  commtime +=usecond();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  jointime -=usecond();
 | 
					 | 
				
			||||||
  jointime +=usecond();
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  // Dhop takes the 4d grid from U, and makes a 5d index for fermion
 | 
					 | 
				
			||||||
  // Not loop ordering and data layout.
 | 
					 | 
				
			||||||
  // Designed to create 
 | 
					 | 
				
			||||||
  // - per thread reuse in L1 cache for U
 | 
					 | 
				
			||||||
  // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#pragma omp parallel 
 | 
					 | 
				
			||||||
  {
 | 
					 | 
				
			||||||
  for(int jjj=0;jjj<100;jjj++){
 | 
					 | 
				
			||||||
#pragma omp barrier
 | 
					 | 
				
			||||||
  dslashtime -=usecond();
 | 
					 | 
				
			||||||
  if ( dag == DaggerYes ) {
 | 
					 | 
				
			||||||
    if( this->HandOptDslash ) {
 | 
					 | 
				
			||||||
#pragma omp for
 | 
					 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					 | 
				
			||||||
	int sU=ss;
 | 
					 | 
				
			||||||
	for(int s=0;s<Ls;s++){
 | 
					 | 
				
			||||||
	  int sF = s+Ls*sU;
 | 
					 | 
				
			||||||
	  Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    } else { 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#pragma omp for
 | 
					 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
	  int sd;
 | 
					 | 
				
			||||||
	  for(sd=0;sd<Ls;sd++){
 | 
					 | 
				
			||||||
	    int sU=ss;
 | 
					 | 
				
			||||||
	    int sF = sd+Ls*sU;
 | 
					 | 
				
			||||||
	    Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  } else {
 | 
					 | 
				
			||||||
    if( this->AsmOptDslash ) {
 | 
					 | 
				
			||||||
      //      for(int i=0;i<1;i++){
 | 
					 | 
				
			||||||
      //      for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
 | 
					 | 
				
			||||||
      //	PerformanceCounter Counter(i);
 | 
					 | 
				
			||||||
      //	Counter.Start();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#pragma omp for
 | 
					 | 
				
			||||||
      for(int t=0;t<threads;t++){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	int hyperthread = t%HT;
 | 
					 | 
				
			||||||
	int core        = t/HT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        int sswork, swork,soff,ssoff,  sU,sF;
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	GridThread::GetWork(nwork,core,sswork,ssoff,cores);
 | 
					 | 
				
			||||||
	GridThread::GetWork(Ls   , hyperthread, swork, soff,HT);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for(int ss=0;ss<sswork;ss++){
 | 
					 | 
				
			||||||
	  for(int s=soff;s<soff+swork;s++){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	    sU=ss+ ssoff;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	    if ( LebesgueOrder::UseLebesgueOrder ) {
 | 
					 | 
				
			||||||
	      sU = lo.Reorder(sU);
 | 
					 | 
				
			||||||
	    }
 | 
					 | 
				
			||||||
	    sF = s+Ls*sU;
 | 
					 | 
				
			||||||
	    Kernels::DiracOptAsmDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
      //      Counter.Stop();
 | 
					 | 
				
			||||||
      //      Counter.Report();
 | 
					 | 
				
			||||||
      //      }
 | 
					 | 
				
			||||||
    } else if( this->HandOptDslash ) {
 | 
					 | 
				
			||||||
#pragma omp for
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					 | 
				
			||||||
	int sU=ss;
 | 
					 | 
				
			||||||
	for(int s=0;s<Ls;s++){
 | 
					 | 
				
			||||||
	  int sF = s+Ls*sU;
 | 
					 | 
				
			||||||
	  Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    } else { 
 | 
					 | 
				
			||||||
#pragma omp for
 | 
					 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					 | 
				
			||||||
	int sU=ss;
 | 
					 | 
				
			||||||
	for(int s=0;s<Ls;s++){
 | 
					 | 
				
			||||||
	  int sF = s+Ls*sU; 
 | 
					 | 
				
			||||||
	  Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  dslashtime +=usecond();
 | 
					 | 
				
			||||||
  alltime+=usecond();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
template<class Impl>
 | 
					 | 
				
			||||||
void WilsonFermion5D<Impl>::DhopInternalL1bench(StencilImpl & st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
						DoubledGaugeField & U,
 | 
					 | 
				
			||||||
						const FermionField &in, FermionField &out,int dag)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  //  assert((dag==DaggerNo) ||(dag==DaggerYes));
 | 
					 | 
				
			||||||
  alltime-=usecond();
 | 
					 | 
				
			||||||
  Compressor compressor(dag);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Assume balanced KMP_AFFINITY; this is forced in GridThread.h
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int threads = GridThread::GetThreads();
 | 
					 | 
				
			||||||
  int HT      = GridThread::GetHyperThreads();
 | 
					 | 
				
			||||||
  int cores   = GridThread::GetCores();
 | 
					 | 
				
			||||||
  int nwork = U._grid->oSites();
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  commtime -=usecond();
 | 
					 | 
				
			||||||
  auto handle = st.HaloExchangeBegin(in,compressor);
 | 
					 | 
				
			||||||
  st.HaloExchangeComplete(handle);
 | 
					 | 
				
			||||||
  commtime +=usecond();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  jointime -=usecond();
 | 
					 | 
				
			||||||
  jointime +=usecond();
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  // Dhop takes the 4d grid from U, and makes a 5d index for fermion
 | 
					 | 
				
			||||||
  // Not loop ordering and data layout.
 | 
					 | 
				
			||||||
  // Designed to create 
 | 
					 | 
				
			||||||
  // - per thread reuse in L1 cache for U
 | 
					 | 
				
			||||||
  // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#pragma omp parallel 
 | 
					 | 
				
			||||||
  {
 | 
					 | 
				
			||||||
  for(int jjj=0;jjj<100;jjj++){
 | 
					 | 
				
			||||||
#pragma omp barrier
 | 
					 | 
				
			||||||
  dslashtime -=usecond();
 | 
					 | 
				
			||||||
  if ( dag == DaggerYes ) {
 | 
					 | 
				
			||||||
    if( this->HandOptDslash ) {
 | 
					 | 
				
			||||||
#pragma omp for
 | 
					 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					 | 
				
			||||||
	int sU=0;
 | 
					 | 
				
			||||||
	for(int s=0;s<Ls;s++){
 | 
					 | 
				
			||||||
	  int sF = s+Ls*sU;
 | 
					 | 
				
			||||||
	  Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    } else { 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#pragma omp for
 | 
					 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
	  int sd;
 | 
					 | 
				
			||||||
	  for(sd=0;sd<Ls;sd++){
 | 
					 | 
				
			||||||
	    int sU=0;
 | 
					 | 
				
			||||||
	    int sF = sd+Ls*sU;
 | 
					 | 
				
			||||||
	    Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  } else {
 | 
					 | 
				
			||||||
    if( this->AsmOptDslash ) {
 | 
					 | 
				
			||||||
      //      for(int i=0;i<1;i++){
 | 
					 | 
				
			||||||
      //      for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
 | 
					 | 
				
			||||||
      //	PerformanceCounter Counter(i);
 | 
					 | 
				
			||||||
      //	Counter.Start();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#pragma omp for
 | 
					 | 
				
			||||||
      for(int t=0;t<threads;t++){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	int hyperthread = t%HT;
 | 
					 | 
				
			||||||
	int core        = t/HT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        int sswork, swork,soff,ssoff,  sU,sF;
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	GridThread::GetWork(nwork,core,sswork,ssoff,cores);
 | 
					 | 
				
			||||||
	GridThread::GetWork(Ls   , hyperthread, swork, soff,HT);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for(int ss=0;ss<sswork;ss++){
 | 
					 | 
				
			||||||
	  for(int s=soff;s<soff+swork;s++){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	    sU=0;
 | 
					 | 
				
			||||||
	    sF = s+Ls*sU;
 | 
					 | 
				
			||||||
	    Kernels::DiracOptAsmDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
      //      Counter.Stop();
 | 
					 | 
				
			||||||
      //      Counter.Report();
 | 
					 | 
				
			||||||
      //      }
 | 
					 | 
				
			||||||
    } else if( this->HandOptDslash ) {
 | 
					 | 
				
			||||||
#pragma omp for
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					 | 
				
			||||||
	int sU=0;
 | 
					 | 
				
			||||||
	for(int s=0;s<Ls;s++){
 | 
					 | 
				
			||||||
	  int sF = s+Ls*sU;
 | 
					 | 
				
			||||||
	  Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    } else { 
 | 
					 | 
				
			||||||
#pragma omp for
 | 
					 | 
				
			||||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
					 | 
				
			||||||
	int sU=0;
 | 
					 | 
				
			||||||
	for(int s=0;s<Ls;s++){
 | 
					 | 
				
			||||||
	  int sF = s+Ls*sU; 
 | 
					 | 
				
			||||||
	  Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  dslashtime +=usecond();
 | 
					 | 
				
			||||||
  alltime+=usecond();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
template<class Impl>
 | 
					 | 
				
			||||||
void WilsonFermion5D<Impl>::DhopInternalCommsOverlapCompute(StencilImpl & st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
						     DoubledGaugeField & U,
 | 
					 | 
				
			||||||
						     const FermionField &in, FermionField &out,int dag)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  assert(0);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
 | 
					void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
@@ -706,6 +470,8 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
FermOpTemplateInstantiate(WilsonFermion5D);
 | 
					FermOpTemplateInstantiate(WilsonFermion5D);
 | 
				
			||||||
GparityFermOpTemplateInstantiate(WilsonFermion5D);
 | 
					GparityFermOpTemplateInstantiate(WilsonFermion5D);
 | 
				
			||||||
 | 
					template class WilsonFermion5D<DomainWallRedBlack5dImplF>;		
 | 
				
			||||||
 | 
					template class WilsonFermion5D<DomainWallRedBlack5dImplD>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}}
 | 
					}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -87,6 +87,7 @@ namespace Grid {
 | 
				
			|||||||
      virtual void   MeooeDag    (const FermionField &in, FermionField &out){assert(0);};
 | 
					      virtual void   MeooeDag    (const FermionField &in, FermionField &out){assert(0);};
 | 
				
			||||||
      virtual void   MooeeDag    (const FermionField &in, FermionField &out){assert(0);};
 | 
					      virtual void   MooeeDag    (const FermionField &in, FermionField &out){assert(0);};
 | 
				
			||||||
      virtual void   MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
 | 
					      virtual void   MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
 | 
				
			||||||
 | 
					      virtual void   Mdir   (const FermionField &in, FermionField &out,int dir,int disp){assert(0);};   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // These can be overridden by fancy 5d chiral action
 | 
					      // These can be overridden by fancy 5d chiral action
 | 
				
			||||||
      virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
					      virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
				
			||||||
@@ -121,32 +122,12 @@ namespace Grid {
 | 
				
			|||||||
			FermionField &out,
 | 
								FermionField &out,
 | 
				
			||||||
			int dag);
 | 
								int dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      void DhopInternalOMPbench(StencilImpl & st,
 | 
					 | 
				
			||||||
				LebesgueOrder &lo,
 | 
					 | 
				
			||||||
				DoubledGaugeField &U,
 | 
					 | 
				
			||||||
				const FermionField &in, 
 | 
					 | 
				
			||||||
				FermionField &out,
 | 
					 | 
				
			||||||
				int dag);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      void DhopInternalL1bench(StencilImpl & st,
 | 
					 | 
				
			||||||
				LebesgueOrder &lo,
 | 
					 | 
				
			||||||
				DoubledGaugeField &U,
 | 
					 | 
				
			||||||
				const FermionField &in, 
 | 
					 | 
				
			||||||
				FermionField &out,
 | 
					 | 
				
			||||||
				int dag);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      void DhopInternalCommsThenCompute(StencilImpl & st,
 | 
					      void DhopInternalCommsThenCompute(StencilImpl & st,
 | 
				
			||||||
			LebesgueOrder &lo,
 | 
								LebesgueOrder &lo,
 | 
				
			||||||
			DoubledGaugeField &U,
 | 
								DoubledGaugeField &U,
 | 
				
			||||||
			const FermionField &in, 
 | 
								const FermionField &in, 
 | 
				
			||||||
			FermionField &out,
 | 
								FermionField &out,
 | 
				
			||||||
			int dag);
 | 
								int dag);
 | 
				
			||||||
      void DhopInternalCommsOverlapCompute(StencilImpl & st,
 | 
					 | 
				
			||||||
			LebesgueOrder &lo,
 | 
					 | 
				
			||||||
			DoubledGaugeField &U,
 | 
					 | 
				
			||||||
			const FermionField &in, 
 | 
					 | 
				
			||||||
			FermionField &out,
 | 
					 | 
				
			||||||
			int dag);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Constructors
 | 
					      // Constructors
 | 
				
			||||||
      WilsonFermion5D(GaugeField &_Umu,
 | 
					      WilsonFermion5D(GaugeField &_Umu,
 | 
				
			||||||
@@ -156,6 +137,15 @@ namespace Grid {
 | 
				
			|||||||
		      GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
							      GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
				
			||||||
		      double _M5,const ImplParams &p= ImplParams());
 | 
							      double _M5,const ImplParams &p= ImplParams());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Constructors
 | 
				
			||||||
 | 
					      WilsonFermion5D(int simd, 
 | 
				
			||||||
 | 
							      GaugeField &_Umu,
 | 
				
			||||||
 | 
							      GridCartesian         &FiveDimGrid,
 | 
				
			||||||
 | 
							      GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
				
			||||||
 | 
							      GridCartesian         &FourDimGrid,
 | 
				
			||||||
 | 
							      GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
				
			||||||
 | 
							      double _M5,const ImplParams &p= ImplParams());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // DoubleStore
 | 
					      // DoubleStore
 | 
				
			||||||
      void ImportGauge(const GaugeField &_Umu);
 | 
					      void ImportGauge(const GaugeField &_Umu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -529,5 +529,7 @@ void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField
 | 
				
			|||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  FermOpTemplateInstantiate(WilsonKernels);
 | 
					  FermOpTemplateInstantiate(WilsonKernels);
 | 
				
			||||||
 | 
					template class WilsonKernels<DomainWallRedBlack5dImplF>;		
 | 
				
			||||||
 | 
					template class WilsonKernels<DomainWallRedBlack5dImplD>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}}
 | 
					}}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -256,5 +256,7 @@ void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField
 | 
				
			|||||||
  template class WilsonKernels<WilsonImplD>; 
 | 
					  template class WilsonKernels<WilsonImplD>; 
 | 
				
			||||||
  template class WilsonKernels<GparityWilsonImplF>;
 | 
					  template class WilsonKernels<GparityWilsonImplF>;
 | 
				
			||||||
  template class WilsonKernels<GparityWilsonImplD>;
 | 
					  template class WilsonKernels<GparityWilsonImplD>;
 | 
				
			||||||
 | 
					  template class WilsonKernels<DomainWallRedBlack5dImplF>;
 | 
				
			||||||
 | 
					  template class WilsonKernels<DomainWallRedBlack5dImplD>;
 | 
				
			||||||
}}
 | 
					}}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -54,14 +54,15 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
    Chi_11 = ref()(1)(1);\
 | 
					    Chi_11 = ref()(1)(1);\
 | 
				
			||||||
    Chi_12 = ref()(1)(2);
 | 
					    Chi_12 = ref()(1)(2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// To splat or not to splat depends on the implementation
 | 
				
			||||||
#define MULT_2SPIN(A)\
 | 
					#define MULT_2SPIN(A)\
 | 
				
			||||||
   auto & ref(U._odata[sU](A));	\
 | 
					   auto & ref(U._odata[sU](A));	\
 | 
				
			||||||
    U_00 = ref()(0,0);\
 | 
					   Impl::loadLinkElement(U_00,ref()(0,0));	\
 | 
				
			||||||
    U_10 = ref()(1,0);\
 | 
					   Impl::loadLinkElement(U_10,ref()(1,0));	\
 | 
				
			||||||
    U_20 = ref()(2,0);\
 | 
					   Impl::loadLinkElement(U_20,ref()(2,0));	\
 | 
				
			||||||
    U_01 = ref()(0,1);\
 | 
					   Impl::loadLinkElement(U_01,ref()(0,1));	\
 | 
				
			||||||
    U_11 = ref()(1,1);				\
 | 
					   Impl::loadLinkElement(U_11,ref()(1,1));	\
 | 
				
			||||||
    U_21 = ref()(2,1);\
 | 
					   Impl::loadLinkElement(U_21,ref()(2,1));	\
 | 
				
			||||||
    UChi_00 = U_00*Chi_00;\
 | 
					    UChi_00 = U_00*Chi_00;\
 | 
				
			||||||
    UChi_10 = U_00*Chi_10;\
 | 
					    UChi_10 = U_00*Chi_10;\
 | 
				
			||||||
    UChi_01 = U_10*Chi_00;\
 | 
					    UChi_01 = U_10*Chi_00;\
 | 
				
			||||||
@@ -74,9 +75,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
    UChi_11+= U_11*Chi_11;\
 | 
					    UChi_11+= U_11*Chi_11;\
 | 
				
			||||||
    UChi_02+= U_21*Chi_01;\
 | 
					    UChi_02+= U_21*Chi_01;\
 | 
				
			||||||
    UChi_12+= U_21*Chi_11;\
 | 
					    UChi_12+= U_21*Chi_11;\
 | 
				
			||||||
    U_00 = ref()(0,2);\
 | 
					    Impl::loadLinkElement(U_00,ref()(0,2));	\
 | 
				
			||||||
    U_10 = ref()(1,2);\
 | 
					    Impl::loadLinkElement(U_10,ref()(1,2));	\
 | 
				
			||||||
    U_20 = ref()(2,2);\
 | 
					    Impl::loadLinkElement(U_20,ref()(2,2));	\
 | 
				
			||||||
    UChi_00+= U_00*Chi_02;\
 | 
					    UChi_00+= U_00*Chi_02;\
 | 
				
			||||||
    UChi_10+= U_00*Chi_12;\
 | 
					    UChi_10+= U_00*Chi_12;\
 | 
				
			||||||
    UChi_01+= U_10*Chi_02;\
 | 
					    UChi_01+= U_10*Chi_02;\
 | 
				
			||||||
@@ -84,6 +85,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
    UChi_02+= U_20*Chi_02;\
 | 
					    UChi_02+= U_20*Chi_02;\
 | 
				
			||||||
    UChi_12+= U_20*Chi_12;
 | 
					    UChi_12+= U_20*Chi_12;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define PERMUTE_DIR(dir)			\
 | 
					#define PERMUTE_DIR(dir)			\
 | 
				
			||||||
      permute##dir(Chi_00,Chi_00);\
 | 
					      permute##dir(Chi_00,Chi_00);\
 | 
				
			||||||
      permute##dir(Chi_01,Chi_01);\
 | 
					      permute##dir(Chi_01,Chi_01);\
 | 
				
			||||||
@@ -809,7 +811,6 @@ int WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,Doub
 | 
				
			|||||||
							     int sF,int sU,const FermionField &in, FermionField &out)
 | 
												     int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // returns void, will template override for Wilson Nc=3
 | 
					  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // returns void, will template override for Wilson Nc=3
 | 
				
			||||||
  //check consistency of return types between these functions and the ones in WilsonKernels.cc
 | 
					 | 
				
			||||||
  return 0;
 | 
					  return 0;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -843,6 +844,47 @@ int WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,D
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //////////////
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					template<>
 | 
				
			||||||
 | 
					int WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
												     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
												     int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // returns void, will template override for Wilson Nc=3
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<>
 | 
				
			||||||
 | 
					int WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
													std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
													int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<>
 | 
				
			||||||
 | 
					int WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
												     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
												     int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<>
 | 
				
			||||||
 | 
					int WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
													std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
													int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template int WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					template int WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
							       int ss,int sU,const FermionField &in, FermionField &out);
 | 
												       int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
@@ -870,4 +912,21 @@ template int WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilI
 | 
				
			|||||||
									 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
														 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
									 int ss,int sU,const FermionField &in, FermionField &out);
 | 
														 int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template int WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
													      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
													      int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					template int WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
													      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
													      int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					template int WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
														 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
														 int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					template int WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
 | 
														 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
 | 
														 int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}}
 | 
					}}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user