mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Added overlap comms compute mode
This commit is contained in:
		@@ -85,11 +85,12 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  Complex cone(1.0,0.0);
 | 
					  Complex cone(1.0,0.0);
 | 
				
			||||||
  for(int nn=0;nn<Nd;nn++){
 | 
					  for(int nn=0;nn<Nd;nn++){
 | 
				
			||||||
    random(pRNG,U[nn]);
 | 
					    random(pRNG,U[nn]);
 | 
				
			||||||
    if(0) {
 | 
					    if(1) {
 | 
				
			||||||
      if (nn==-1) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
 | 
					      if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
 | 
				
			||||||
      else       { U[nn] = cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
 | 
					      //      else       { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
 | 
				
			||||||
 | 
					      else       { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    pokeIndex<LorentzIndex>(Umu,U[nn],nn);
 | 
					    PokeIndex<LorentzIndex>(Umu,U[nn],nn);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										22
									
								
								lib/Init.cc
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								lib/Init.cc
									
									
									
									
									
								
							@@ -238,15 +238,23 @@ void Grid_init(int *argc,char ***argv)
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout <<std::endl;
 | 
					  std::cout <<std::endl;
 | 
				
			||||||
  std::cout <<Logger::GREEN<< "         GGG  "<<Logger::RED<<"  RRRR  "<<Logger::BLUE<<"  IIIII "<<Logger::PURPLE<<"  DDDD    "<<std::endl;
 | 
					  std::cout <<Logger::RED  << "__|__|__|__|__"<<             "|__|__|_"<<              "_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
				
			||||||
  std::cout <<Logger::GREEN<< "        G   G "<<Logger::RED<<"  R   R "<<Logger::BLUE<<"    I   "<<Logger::PURPLE<<"  D   D   "<<std::endl;
 | 
					  std::cout <<Logger::RED  << "__|__|__|__|__"<<             "|__|__|_"<<              "_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
				
			||||||
  std::cout <<Logger::GREEN<< "        G     "<<Logger::RED<<"  R   R "<<Logger::BLUE<<"    I   "<<Logger::PURPLE<<"  D    D  "<<std::endl;
 | 
					  std::cout <<Logger::RED  << "__|__|__|  |  "<<             "|  |  | "<<              " |  |  |"<<                "  |  |  | _|__"<<std::endl; 
 | 
				
			||||||
  std::cout <<Logger::GREEN<< "        G  GGG"<<Logger::RED<<"  RRRR  "<<Logger::BLUE<<"    I   "<<Logger::PURPLE<<"  D    D  "<<std::endl;
 | 
					  std::cout <<Logger::RED  << "__|__         "<<             "        "<<              "        "<<                "          _|__"<<std::endl; 
 | 
				
			||||||
  std::cout <<Logger::GREEN<< "        G   G "<<Logger::RED<<"  R  R  "<<Logger::BLUE<<"    I   "<<Logger::PURPLE<<"  D   D   "<<std::endl;
 | 
					  std::cout <<Logger::GREEN<< "__|_   GGGG   "<<Logger::RED<<" RRRR   "<<Logger::BLUE<<" III    "<<Logger::PURPLE<<"DDDD      _|__"<<std::endl;
 | 
				
			||||||
  std::cout <<Logger::GREEN<< "         GGGG "<<Logger::RED<<"  R   R "<<Logger::BLUE<<"  IIIII "<<Logger::PURPLE<<"  DDDD    "<<std::endl;
 | 
					  std::cout <<Logger::GREEN<< "__|_  G       "<<Logger::RED<<" R   R  "<<Logger::BLUE<<"  I     "<<Logger::PURPLE<<"D   D     _|__"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout <<Logger::GREEN<< "__|_  G       "<<Logger::RED<<" R   R  "<<Logger::BLUE<<"  I     "<<Logger::PURPLE<<"D    D    _|__"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout <<Logger::GREEN<< "__|_  G  GG   "<<Logger::RED<<" RRRR   "<<Logger::BLUE<<"  I     "<<Logger::PURPLE<<"D    D    _|__"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout <<Logger::GREEN<< "__|_  G   G   "<<Logger::RED<<" R  R   "<<Logger::BLUE<<"  I     "<<Logger::PURPLE<<"D   D     _|__"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout <<Logger::GREEN<< "__|_   GGGG   "<<Logger::RED<<" R   R  "<<Logger::BLUE<<" III    "<<Logger::PURPLE<<"DDDD      _|__"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout <<Logger::BLUE << "__|__         "<<             "        "<<              "        "<<                "          _|__"<<std::endl; 
 | 
				
			||||||
 | 
					  std::cout <<Logger::BLUE << "__|__|__|__|__"<<             "|__|____"<<              "_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
				
			||||||
 | 
					  std::cout <<Logger::BLUE << "__|__|__|__|__"<<             "|__|__|_"<<              "_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
				
			||||||
 | 
					  std::cout <<Logger::BLUE << "  |  |  |  |  "<<             "|  |  | "<<              " |  |  |"<<                "  |  |  |  |  "<<std::endl; 
 | 
				
			||||||
  std::cout << std::endl;
 | 
					  std::cout << std::endl;
 | 
				
			||||||
  std::cout << std::endl;
 | 
					  std::cout << std::endl;
 | 
				
			||||||
  std::cout <<Logger::RED<< std::endl;
 | 
					  std::cout <<Logger::YELLOW<< std::endl;
 | 
				
			||||||
  std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
 | 
					  std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
 | 
				
			||||||
  std::cout << "Colours by Tadahito Boyle "<<std::endl;
 | 
					  std::cout << "Colours by Tadahito Boyle "<<std::endl;
 | 
				
			||||||
  std::cout << std::endl;
 | 
					  std::cout << std::endl;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,6 +30,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#ifndef GRID_STENCIL_H
 | 
					#ifndef GRID_STENCIL_H
 | 
				
			||||||
#define GRID_STENCIL_H
 | 
					#define GRID_STENCIL_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <thread>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <stencil/Lebesgue.h>   // subdir aggregate
 | 
					#include <stencil/Lebesgue.h>   // subdir aggregate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -90,6 +92,7 @@ namespace Grid {
 | 
				
			|||||||
      int                               _npoints; // Move to template param?
 | 
					      int                               _npoints; // Move to template param?
 | 
				
			||||||
      GridBase *                        _grid;
 | 
					      GridBase *                        _grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // npoints of these
 | 
					      // npoints of these
 | 
				
			||||||
      std::vector<int>                  _directions;
 | 
					      std::vector<int>                  _directions;
 | 
				
			||||||
      std::vector<int>                  _distances;
 | 
					      std::vector<int>                  _distances;
 | 
				
			||||||
@@ -455,6 +458,18 @@ namespace Grid {
 | 
				
			|||||||
      // Could allow a functional munging of the halo to another type during the comms.
 | 
					      // Could allow a functional munging of the halo to another type during the comms.
 | 
				
			||||||
      // this could implement the 16bit/32bit/64bit compression.
 | 
					      // this could implement the 16bit/32bit/64bit compression.
 | 
				
			||||||
      void HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress) 
 | 
					      void HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress) 
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
						std::thread thr = HaloExchangeBegin(source,u_comm_buf,compress);
 | 
				
			||||||
 | 
						thr.join();
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      std::thread HaloExchangeBegin(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > & u_comm_buf,compressor &compress) {
 | 
				
			||||||
 | 
						return std::thread([&] { this->HaloExchangeBlocking(source,u_comm_buf,compress); });
 | 
				
			||||||
 | 
						//	std::thread t(&HaloExchangeBlocking,this,source,u_comm_buf,compress);
 | 
				
			||||||
 | 
						//	return t;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      void HaloExchangeBlocking(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
 | 
				
			||||||
      {
 | 
					      {
 | 
				
			||||||
	// conformable(source._grid,_grid);
 | 
						// conformable(source._grid,_grid);
 | 
				
			||||||
	assert(source._grid==_grid);
 | 
						assert(source._grid==_grid);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -110,10 +110,10 @@ namespace Grid {
 | 
				
			|||||||
    // Single flavour four spinors with colour index
 | 
					    // Single flavour four spinors with colour index
 | 
				
			||||||
    ///////
 | 
					    ///////
 | 
				
			||||||
    template<class S,int Nrepresentation=Nc>
 | 
					    template<class S,int Nrepresentation=Nc>
 | 
				
			||||||
    class WilsonImpl :  public PeriodicGaugeImpl<S,Nrepresentation> { 
 | 
					    class WilsonImpl :  public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { 
 | 
				
			||||||
    public:
 | 
					    public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      typedef PeriodicGaugeImpl<S,Nrepresentation> Gimpl;
 | 
					      typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      INHERIT_GIMPL_TYPES(Gimpl);
 | 
					      INHERIT_GIMPL_TYPES(Gimpl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -182,10 +182,10 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    ////////////////////////////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template<class S,int Nrepresentation>
 | 
					    template<class S,int Nrepresentation>
 | 
				
			||||||
    class GparityWilsonImpl : public ConjugateGaugeImpl<S,Nrepresentation> { 
 | 
					    class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{ 
 | 
				
			||||||
    public:
 | 
					    public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      typedef ConjugateGaugeImpl<S,Nrepresentation> Gimpl;
 | 
					      typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      INHERIT_GIMPL_TYPES(Gimpl);
 | 
					      INHERIT_GIMPL_TYPES(Gimpl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -227,7 +227,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
    out.checkerboard = in.checkerboard;
 | 
					    out.checkerboard = in.checkerboard;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    DhopInternal(Stencil,Umu,in,out,dag);
 | 
					    DhopInternalCommsCompute(Stencil,Umu,in,out,dag);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  template<class Impl>
 | 
					  template<class Impl>
 | 
				
			||||||
@@ -238,7 +238,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    assert(in.checkerboard==Even);
 | 
					    assert(in.checkerboard==Even);
 | 
				
			||||||
    out.checkerboard = Odd;
 | 
					    out.checkerboard = Odd;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    DhopInternal(StencilEven,UmuOdd,in,out,dag);
 | 
					    DhopInternalCommsCompute(StencilEven,UmuOdd,in,out,dag);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  template<class Impl>
 | 
					  template<class Impl>
 | 
				
			||||||
@@ -249,7 +249,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    assert(in.checkerboard==Odd);
 | 
					    assert(in.checkerboard==Odd);
 | 
				
			||||||
    out.checkerboard = Even;
 | 
					    out.checkerboard = Even;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    DhopInternal(StencilOdd,UmuEven,in,out,dag);
 | 
					    DhopInternalCommsCompute(StencilOdd,UmuEven,in,out,dag);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  template<class Impl>
 | 
					  template<class Impl>
 | 
				
			||||||
@@ -320,6 +320,78 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  template<class Impl>
 | 
				
			||||||
 | 
					  void WilsonFermion<Impl>::DhopInternalCommsCompute(StencilImpl & st,DoubledGaugeField & U,
 | 
				
			||||||
 | 
											     const FermionField &in, FermionField &out,int dag) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert((dag==DaggerNo) ||(dag==DaggerYes));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Compressor compressor(dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::thread comms_thread = st.HaloExchangeBegin(in,comm_buf,compressor);
 | 
				
			||||||
 | 
					    comms_thread.join();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool local    = true;
 | 
				
			||||||
 | 
					    bool nonlocal = false;
 | 
				
			||||||
 | 
					    if ( dag == DaggerYes ) {
 | 
				
			||||||
 | 
					      if( HandOptDslash ) {
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					        for(int sss=0;sss<in._grid->oSites();sss++){
 | 
				
			||||||
 | 
						  Kernels::DiracOptHandDhopSiteDag(st,U,comm_buf,sss,sss,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      } else { 
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					        for(int sss=0;sss<in._grid->oSites();sss++){
 | 
				
			||||||
 | 
						  Kernels::DiracOptDhopSiteDag(st,U,comm_buf,sss,sss,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      if( HandOptDslash ) {
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					        for(int sss=0;sss<in._grid->oSites();sss++){
 | 
				
			||||||
 | 
						  Kernels::DiracOptHandDhopSite(st,U,comm_buf,sss,sss,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      } else { 
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					        for(int sss=0;sss<in._grid->oSites();sss++){
 | 
				
			||||||
 | 
						  Kernels::DiracOptDhopSite(st,U,comm_buf,sss,sss,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    local    = false;
 | 
				
			||||||
 | 
					    nonlocal = true;
 | 
				
			||||||
 | 
					    if ( dag == DaggerYes ) {
 | 
				
			||||||
 | 
					      if( HandOptDslash ) {
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					        for(int sss=0;sss<in._grid->oSites();sss++){
 | 
				
			||||||
 | 
						  Kernels::DiracOptHandDhopSiteDag(st,U,comm_buf,sss,sss,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      } else { 
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					        for(int sss=0;sss<in._grid->oSites();sss++){
 | 
				
			||||||
 | 
						  Kernels::DiracOptDhopSiteDag(st,U,comm_buf,sss,sss,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      if( HandOptDslash ) {
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					        for(int sss=0;sss<in._grid->oSites();sss++){
 | 
				
			||||||
 | 
						  Kernels::DiracOptHandDhopSite(st,U,comm_buf,sss,sss,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      } else { 
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					        for(int sss=0;sss<in._grid->oSites();sss++){
 | 
				
			||||||
 | 
						  Kernels::DiracOptDhopSite(st,U,comm_buf,sss,sss,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
  FermOpTemplateInstantiate(WilsonFermion);
 | 
					  FermOpTemplateInstantiate(WilsonFermion);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -114,6 +114,9 @@ namespace Grid {
 | 
				
			|||||||
      void DhopInternal(StencilImpl & st,DoubledGaugeField & U,
 | 
					      void DhopInternal(StencilImpl & st,DoubledGaugeField & U,
 | 
				
			||||||
			const FermionField &in, FermionField &out,int dag) ;
 | 
								const FermionField &in, FermionField &out,int dag) ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      void DhopInternalCommsCompute(StencilImpl & st,DoubledGaugeField & U,
 | 
				
			||||||
 | 
									    const FermionField &in, FermionField &out,int dag) ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Constructor
 | 
					      // Constructor
 | 
				
			||||||
      WilsonFermion(GaugeField &_Umu,
 | 
					      WilsonFermion(GaugeField &_Umu,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -396,6 +396,137 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
  dslashtime +=usecond();
 | 
					  dslashtime +=usecond();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void WilsonFermion5D<Impl>::DhopInternalCommsCompute(StencilImpl & st, LebesgueOrder &lo,
 | 
				
			||||||
 | 
											     DoubledGaugeField & U,
 | 
				
			||||||
 | 
											     const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  //  assert((dag==DaggerNo) ||(dag==DaggerYes));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Compressor compressor(dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Assume balanced KMP_AFFINITY; this is forced in GridThread.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int threads = GridThread::GetThreads();
 | 
				
			||||||
 | 
					  int HT      = GridThread::GetHyperThreads();
 | 
				
			||||||
 | 
					  int cores   = GridThread::GetCores();
 | 
				
			||||||
 | 
					  int nwork = U._grid->oSites();
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  commtime -=usecond();
 | 
				
			||||||
 | 
					  std::thread thr = st.HaloExchangeBegin(in,comm_buf,compressor);
 | 
				
			||||||
 | 
					  commtime +=usecond();
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // Dhop takes the 4d grid from U, and makes a 5d index for fermion
 | 
				
			||||||
 | 
					  // Not loop ordering and data layout.
 | 
				
			||||||
 | 
					  // Designed to create 
 | 
				
			||||||
 | 
					  // - per thread reuse in L1 cache for U
 | 
				
			||||||
 | 
					  // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
 | 
				
			||||||
 | 
					  bool local    = true;
 | 
				
			||||||
 | 
					  bool nonlocal = false;
 | 
				
			||||||
 | 
					  dslashtime -=usecond();
 | 
				
			||||||
 | 
					  if ( dag == DaggerYes ) {
 | 
				
			||||||
 | 
					    if( this->HandOptDslash ) {
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
 | 
						int sU=ss;
 | 
				
			||||||
 | 
						for(int s=0;s<Ls;s++){
 | 
				
			||||||
 | 
						  int sF = s+Ls*sU;
 | 
				
			||||||
 | 
						  Kernels::DiracOptHandDhopSiteDag(st,U,comm_buf,sF,sU,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						  }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } else { 
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
						  int sd;
 | 
				
			||||||
 | 
						  for(sd=0;sd<Ls;sd++){
 | 
				
			||||||
 | 
						    int sU=ss;
 | 
				
			||||||
 | 
						    int sF = sd+Ls*sU;
 | 
				
			||||||
 | 
						    Kernels::DiracOptDhopSiteDag(st,U,comm_buf,sF,sU,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						  }
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    if( this->HandOptDslash ) {
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
 | 
						int sU=ss;
 | 
				
			||||||
 | 
						for(int s=0;s<Ls;s++){
 | 
				
			||||||
 | 
						  int sF = s+Ls*sU;
 | 
				
			||||||
 | 
						  Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } else { 
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
 | 
						int sU=ss;
 | 
				
			||||||
 | 
						for(int s=0;s<Ls;s++){
 | 
				
			||||||
 | 
						  int sF = s+Ls*sU; 
 | 
				
			||||||
 | 
						  Kernels::DiracOptDhopSite(st,U,comm_buf,sF,sU,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  dslashtime +=usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  commtime -=usecond();
 | 
				
			||||||
 | 
					  thr.join();
 | 
				
			||||||
 | 
					  commtime +=usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  local    = false;
 | 
				
			||||||
 | 
					  nonlocal = true;
 | 
				
			||||||
 | 
					  dslashtime -=usecond();
 | 
				
			||||||
 | 
					  if ( dag == DaggerYes ) {
 | 
				
			||||||
 | 
					    if( this->HandOptDslash ) {
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
 | 
						int sU=ss;
 | 
				
			||||||
 | 
						for(int s=0;s<Ls;s++){
 | 
				
			||||||
 | 
						  int sF = s+Ls*sU;
 | 
				
			||||||
 | 
						  Kernels::DiracOptHandDhopSiteDag(st,U,comm_buf,sF,sU,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						  }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } else { 
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
						  int sd;
 | 
				
			||||||
 | 
						  for(sd=0;sd<Ls;sd++){
 | 
				
			||||||
 | 
						    int sU=ss;
 | 
				
			||||||
 | 
						    int sF = sd+Ls*sU;
 | 
				
			||||||
 | 
						    Kernels::DiracOptDhopSiteDag(st,U,comm_buf,sF,sU,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						  }
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    if( this->HandOptDslash ) {
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
 | 
						int sU=ss;
 | 
				
			||||||
 | 
						for(int s=0;s<Ls;s++){
 | 
				
			||||||
 | 
						  int sF = s+Ls*sU;
 | 
				
			||||||
 | 
						  Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } else { 
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
 | 
					      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
				
			||||||
 | 
						int sU=ss;
 | 
				
			||||||
 | 
						for(int s=0;s<Ls;s++){
 | 
				
			||||||
 | 
						  int sF = s+Ls*sU; 
 | 
				
			||||||
 | 
						  Kernels::DiracOptDhopSite(st,U,comm_buf,sF,sU,in,out,local,nonlocal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  dslashtime +=usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
 | 
					void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -405,7 +536,7 @@ void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int
 | 
				
			|||||||
  assert(in.checkerboard==Even);
 | 
					  assert(in.checkerboard==Even);
 | 
				
			||||||
  out.checkerboard = Odd;
 | 
					  out.checkerboard = Odd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
 | 
					  DhopInternalCommsCompute(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
 | 
					void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
@@ -416,7 +547,7 @@ void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int
 | 
				
			|||||||
  assert(in.checkerboard==Odd);
 | 
					  assert(in.checkerboard==Odd);
 | 
				
			||||||
  out.checkerboard = Even;
 | 
					  out.checkerboard = Even;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
 | 
					  DhopInternalCommsCompute(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
 | 
					void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
@@ -426,7 +557,7 @@ void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int d
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  out.checkerboard = in.checkerboard;
 | 
					  out.checkerboard = in.checkerboard;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
 | 
					  DhopInternalCommsCompute(Stencil,Lebesgue,Umu,in,out,dag);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
 | 
					void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -117,6 +117,13 @@ namespace Grid {
 | 
				
			|||||||
			FermionField &out,
 | 
								FermionField &out,
 | 
				
			||||||
			int dag);
 | 
								int dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      void DhopInternalCommsCompute(StencilImpl & st,
 | 
				
			||||||
 | 
								LebesgueOrder &lo,
 | 
				
			||||||
 | 
								DoubledGaugeField &U,
 | 
				
			||||||
 | 
								const FermionField &in, 
 | 
				
			||||||
 | 
								FermionField &out,
 | 
				
			||||||
 | 
								int dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Constructors
 | 
					      // Constructors
 | 
				
			||||||
      WilsonFermion5D(GaugeField &_Umu,
 | 
					      WilsonFermion5D(GaugeField &_Umu,
 | 
				
			||||||
		      GridCartesian         &FiveDimGrid,
 | 
							      GridCartesian         &FiveDimGrid,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,10 +34,11 @@ namespace QCD {
 | 
				
			|||||||
template<class Impl> 
 | 
					template<class Impl> 
 | 
				
			||||||
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p): Base(p) {};
 | 
					WilsonKernels<Impl>::WilsonKernels(const ImplParams &p): Base(p) {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Need controls to do interior, exterior, or both
 | 
				
			||||||
template<class Impl> 
 | 
					template<class Impl> 
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
					   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
										   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
					   int sF,int sU,const FermionField &in, FermionField &out)
 | 
										   int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  SiteHalfSpinor  tmp;    
 | 
					  SiteHalfSpinor  tmp;    
 | 
				
			||||||
  SiteHalfSpinor  chi;    
 | 
					  SiteHalfSpinor  chi;    
 | 
				
			||||||
@@ -46,232 +47,426 @@ void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField
 | 
				
			|||||||
  StencilEntry *SE;
 | 
					  StencilEntry *SE;
 | 
				
			||||||
  int ptype;
 | 
					  int ptype;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int num = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  result=zero;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Xp
 | 
					  // Xp
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Xp,sF);
 | 
					  SE=st.GetEntry(ptype,Xp,sF);
 | 
				
			||||||
  if ( SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) { 
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjXp(tmp,in._odata[SE->_offset]);
 | 
					      spProjXp(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjXp(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjXp(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) { 
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st);
 | 
					 | 
				
			||||||
  spReconXp(result,Uchi);
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 | 
					  if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st);
 | 
				
			||||||
 | 
					    accumReconXp(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Yp
 | 
					  // Yp
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Yp,sF);
 | 
					  SE=st.GetEntry(ptype,Yp,sF);
 | 
				
			||||||
  if ( SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) { 
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjYp(tmp,in._odata[SE->_offset]);
 | 
					      spProjYp(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjYp(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjYp(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) { 
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st);
 | 
				
			||||||
    accumReconYp(result,Uchi);
 | 
					    accumReconYp(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Zp
 | 
					  // Zp
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Zp,sF);
 | 
					  SE=st.GetEntry(ptype,Zp,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) { 
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjZp(tmp,in._odata[SE->_offset]);
 | 
					      spProjZp(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjZp(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjZp(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) { 
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st);
 | 
				
			||||||
    accumReconZp(result,Uchi);
 | 
					    accumReconZp(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Tp
 | 
					  // Tp
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Tp,sF);
 | 
					  SE=st.GetEntry(ptype,Tp,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) {
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjTp(tmp,in._odata[SE->_offset]);
 | 
					      spProjTp(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjTp(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjTp(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) {
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st);
 | 
				
			||||||
    accumReconTp(result,Uchi);
 | 
					    accumReconTp(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Xm
 | 
					  // Xm
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Xm,sF);
 | 
					  SE=st.GetEntry(ptype,Xm,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) {
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjXm(tmp,in._odata[SE->_offset]);
 | 
					      spProjXm(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjXm(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjXm(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) {
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
 | 
				
			||||||
    accumReconXm(result,Uchi);
 | 
					    accumReconXm(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Ym
 | 
					  // Ym
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Ym,sF);
 | 
					  SE=st.GetEntry(ptype,Ym,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) {
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjYm(tmp,in._odata[SE->_offset]);
 | 
					      spProjYm(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjYm(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjYm(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) {
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st);
 | 
				
			||||||
    accumReconYm(result,Uchi);
 | 
					    accumReconYm(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Zm
 | 
					  // Zm
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Zm,sF);
 | 
					  SE=st.GetEntry(ptype,Zm,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) {
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjZm(tmp,in._odata[SE->_offset]);
 | 
					      spProjZm(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjZm(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjZm(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) {
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st);
 | 
				
			||||||
    accumReconZm(result,Uchi);
 | 
					    accumReconZm(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Tm
 | 
					  // Tm
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Tm,sF);
 | 
					  SE=st.GetEntry(ptype,Tm,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) {
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjTm(tmp,in._odata[SE->_offset]);
 | 
					      spProjTm(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjTm(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else { 
 | 
					    } else { 
 | 
				
			||||||
 | 
					      spProjTm(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) {
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st);
 | 
				
			||||||
    accumReconTm(result,Uchi);
 | 
					    accumReconTm(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( local ) {
 | 
				
			||||||
    vstream(out._odata[sF],result*(-0.5));
 | 
					    vstream(out._odata[sF],result*(-0.5));
 | 
				
			||||||
 | 
					  } else if ( num ) { 
 | 
				
			||||||
 | 
					    vstream(out._odata[sF],out._odata[sF]+result*(-0.5));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Need controls to do interior, exterior, or both
 | 
				
			||||||
template<class Impl> 
 | 
					template<class Impl> 
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
					   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
										   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
					      int sF,int sU,const FermionField &in, FermionField &out)
 | 
										   int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  SiteHalfSpinor  tmp;    
 | 
					  SiteHalfSpinor  tmp;    
 | 
				
			||||||
  SiteHalfSpinor  chi;    
 | 
					  SiteHalfSpinor  chi;    
 | 
				
			||||||
  SiteSpinor result;
 | 
					 | 
				
			||||||
  SiteHalfSpinor Uchi;
 | 
					  SiteHalfSpinor Uchi;
 | 
				
			||||||
 | 
					  SiteSpinor result;
 | 
				
			||||||
  StencilEntry *SE;
 | 
					  StencilEntry *SE;
 | 
				
			||||||
  int ptype;
 | 
					  int ptype;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int num = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  result=zero;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Xp
 | 
					  // Xp
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Xm,sF);
 | 
					  SE=st.GetEntry(ptype,Xm,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) { 
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjXp(tmp,in._odata[SE->_offset]);
 | 
					      spProjXp(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjXp(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjXp(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) { 
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
 | 
					 | 
				
			||||||
  spReconXp(result,Uchi);
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 | 
					  if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
 | 
				
			||||||
 | 
					    accumReconXp(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Yp
 | 
					  // Yp
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Ym,sF);
 | 
					  SE=st.GetEntry(ptype,Ym,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) { 
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjYp(tmp,in._odata[SE->_offset]);
 | 
					      spProjYp(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjYp(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjYp(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) { 
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st);
 | 
				
			||||||
    accumReconYp(result,Uchi);
 | 
					    accumReconYp(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Zp
 | 
					  // Zp
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Zm,sF);
 | 
					  SE=st.GetEntry(ptype,Zm,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) { 
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjZp(tmp,in._odata[SE->_offset]);
 | 
					      spProjZp(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjZp(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjZp(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) { 
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st);
 | 
				
			||||||
    accumReconZp(result,Uchi);
 | 
					    accumReconZp(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Tp
 | 
					  // Tp
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Tm,sF);
 | 
					  SE=st.GetEntry(ptype,Tm,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) {
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjTp(tmp,in._odata[SE->_offset]);
 | 
					      spProjTp(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjTp(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjTp(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) {
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st);
 | 
				
			||||||
    accumReconTp(result,Uchi);
 | 
					    accumReconTp(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Xm
 | 
					  // Xm
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Xp,sF);
 | 
					  SE=st.GetEntry(ptype,Xp,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) {
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjXm(tmp,in._odata[SE->_offset]);
 | 
					      spProjXm(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjXm(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjXm(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) {
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st);
 | 
				
			||||||
    accumReconXm(result,Uchi);
 | 
					    accumReconXm(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Ym
 | 
					  // Ym
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Yp,sF);
 | 
					  SE=st.GetEntry(ptype,Yp,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) {
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjYm(tmp,in._odata[SE->_offset]);
 | 
					      spProjYm(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjYm(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjYm(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) {
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st);
 | 
				
			||||||
    accumReconYm(result,Uchi);
 | 
					    accumReconYm(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Zm
 | 
					  // Zm
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Zp,sF);
 | 
					  SE=st.GetEntry(ptype,Zp,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) {
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjZm(tmp,in._odata[SE->_offset]);
 | 
					      spProjZm(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjZm(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					      spProjZm(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) {
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st);
 | 
				
			||||||
    accumReconZm(result,Uchi);
 | 
					    accumReconZm(result,Uchi);
 | 
				
			||||||
 | 
					    num++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  // Tm
 | 
					  // Tm
 | 
				
			||||||
 | 
					  ///////////////////////////
 | 
				
			||||||
  SE=st.GetEntry(ptype,Tp,sF);
 | 
					  SE=st.GetEntry(ptype,Tp,sF);
 | 
				
			||||||
  if (  SE->_is_local && SE->_permute ) {
 | 
					
 | 
				
			||||||
 | 
					  if (local && SE->_is_local ) {
 | 
				
			||||||
 | 
					    if ( SE->_permute ) {
 | 
				
			||||||
      spProjTm(tmp,in._odata[SE->_offset]);
 | 
					      spProjTm(tmp,in._odata[SE->_offset]);
 | 
				
			||||||
      permute(chi,tmp,ptype);
 | 
					      permute(chi,tmp,ptype);
 | 
				
			||||||
  } else if ( SE->_is_local ) {
 | 
					 | 
				
			||||||
    spProjTm(chi,in._odata[SE->_offset]);
 | 
					 | 
				
			||||||
    } else { 
 | 
					    } else { 
 | 
				
			||||||
 | 
					      spProjTm(chi,in._odata[SE->_offset]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( nonlocal && (!SE->_is_local) ) {
 | 
				
			||||||
    chi=buf[SE->_offset];
 | 
					    chi=buf[SE->_offset];
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
 | 
				
			||||||
    Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st);
 | 
					    Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st);
 | 
				
			||||||
    accumReconTm(result,Uchi);
 | 
					    accumReconTm(result,Uchi);
 | 
				
			||||||
  
 | 
					    num++;
 | 
				
			||||||
  vstream(out._odata[sF],result*(-0.5));
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( local ) {
 | 
				
			||||||
 | 
					    vstream(out._odata[sF],result*(-0.5));
 | 
				
			||||||
 | 
					  } else if ( num ) { 
 | 
				
			||||||
 | 
					    vstream(out._odata[sF],out._odata[sF]+result*(-0.5));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl> 
 | 
					template<class Impl> 
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
					  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
										  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
@@ -405,7 +600,7 @@ void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			|||||||
template<class Impl> 
 | 
					template<class Impl> 
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
					      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
										      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
					      int sF,int sU,const FermionField &in, FermionField &out,uint64_t *p)
 | 
										      int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
					  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -48,11 +48,11 @@ namespace Grid {
 | 
				
			|||||||
    public:
 | 
					    public:
 | 
				
			||||||
     void DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					     void DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
			   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
								   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
			   int sF,int sU,const FermionField &in, FermionField &out);
 | 
								   int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
     void DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					     void DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
			      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
			      int sF,int sU,const FermionField &in,FermionField &out);
 | 
								      int sF,int sU,const FermionField &in,FermionField &out,bool local= true, bool nonlocal=true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
     void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
 | 
					     void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
			  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
@@ -60,15 +60,15 @@ namespace Grid {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
     void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					     void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
			      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
			      int sF,int sU,const FermionField &in, FermionField &out,uint64_t *);
 | 
								      int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
     void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					     void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
			       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
								       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
			       int sF,int sU,const FermionField &in, FermionField &out);
 | 
								       int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
     void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					     void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
				  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
									  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
				  int sF,int sU,const FermionField &in, FermionField &out);
 | 
									  int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
     WilsonKernels(const ImplParams &p= ImplParams());
 | 
					     WilsonKernels(const ImplParams &p= ImplParams());
 | 
				
			||||||
     
 | 
					     
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -312,7 +312,7 @@ namespace QCD {
 | 
				
			|||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
						   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
											   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
					       int ss,int sU,const FermionField &in, FermionField &out)
 | 
											   int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //  std::cout << "Hand op Dhop "<<std::endl;
 | 
					  //  std::cout << "Hand op Dhop "<<std::endl;
 | 
				
			||||||
  typedef typename Simd::scalar_type S;
 | 
					  typedef typename Simd::scalar_type S;
 | 
				
			||||||
@@ -556,7 +556,7 @@ void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeF
 | 
				
			|||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
						std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
											std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
						   int ss,int sU,const FermionField &in, FermionField &out)
 | 
											int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  typedef typename Simd::scalar_type S;
 | 
					  typedef typename Simd::scalar_type S;
 | 
				
			||||||
  typedef typename Simd::vector_type V;
 | 
					  typedef typename Simd::vector_type V;
 | 
				
			||||||
@@ -802,7 +802,7 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel
 | 
				
			|||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
							     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
					       int sF,int sU,const FermionField &in, FermionField &out)
 | 
												     int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
					  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -810,7 +810,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,Dou
 | 
				
			|||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
								std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
						   int sF,int sU,const FermionField &in, FermionField &out)
 | 
													int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
					  DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -818,7 +818,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,
 | 
				
			|||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
							     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
					       int sF,int sU,const FermionField &in, FermionField &out)
 | 
												     int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
					  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -826,7 +826,7 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,Dou
 | 
				
			|||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
								std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
						   int sF,int sU,const FermionField &in, FermionField &out)
 | 
													int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
					  DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -835,29 +835,29 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
							  int ss,int sU,const FermionField &in, FermionField &out);
 | 
												       int ss,int sU,const FermionField &in, FermionField &out,bool l,bool n);
 | 
				
			||||||
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
							  int ss,int sU,const FermionField &in, FermionField &out);
 | 
												       int ss,int sU,const FermionField &in, FermionField &out, bool l, bool n);
 | 
				
			||||||
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
							  int ss,int sU,const FermionField &in, FermionField &out);
 | 
													  int ss,int sU,const FermionField &in, FermionField &out, bool l, bool n);
 | 
				
			||||||
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
							  int ss,int sU,const FermionField &in, FermionField &out);
 | 
													  int ss,int sU,const FermionField &in, FermionField &out, bool l, bool n);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
							  int ss,int sU,const FermionField &in, FermionField &out);
 | 
													      int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl);
 | 
				
			||||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
					template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
							  int ss,int sU,const FermionField &in, FermionField &out);
 | 
													      int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl);
 | 
				
			||||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
									 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
														 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
							  int ss,int sU,const FermionField &in, FermionField &out);
 | 
														 int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl);
 | 
				
			||||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
					template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
				
			||||||
									 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
														 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
				
			||||||
							  int ss,int sU,const FermionField &in, FermionField &out);
 | 
														 int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}}
 | 
					}}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -45,10 +45,9 @@ template<class Gimpl> class WilsonLoops;
 | 
				
			|||||||
    typedef typename GImpl::GaugeField               GaugeField;	
 | 
					    typedef typename GImpl::GaugeField               GaugeField;	
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Composition with smeared link, bc's etc.. probably need multiple inheritance
 | 
					    // 
 | 
				
			||||||
    // Variable precision "S" and variable Nc
 | 
					 | 
				
			||||||
    template<class S,int Nrepresentation=Nc>
 | 
					    template<class S,int Nrepresentation=Nc>
 | 
				
			||||||
    class PeriodicGaugeImpl { 
 | 
					    class GaugeImplTypes { 
 | 
				
			||||||
    public:
 | 
					    public:
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
      typedef S Simd;
 | 
					      typedef S Simd;
 | 
				
			||||||
@@ -62,6 +61,16 @@ template<class Gimpl> class WilsonLoops;
 | 
				
			|||||||
      typedef Lattice<SiteGaugeLink>                GaugeLinkField; // bit ugly naming; polarised gauge field, lorentz... all ugly
 | 
					      typedef Lattice<SiteGaugeLink>                GaugeLinkField; // bit ugly naming; polarised gauge field, lorentz... all ugly
 | 
				
			||||||
      typedef Lattice<SiteGaugeField>                   GaugeField;
 | 
					      typedef Lattice<SiteGaugeField>                   GaugeField;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Composition with smeared link, bc's etc.. probably need multiple inheritance
 | 
				
			||||||
 | 
					    // Variable precision "S" and variable Nc
 | 
				
			||||||
 | 
					    template<class GimplTypes>
 | 
				
			||||||
 | 
					    class PeriodicGaugeImpl : public GimplTypes  { 
 | 
				
			||||||
 | 
					    public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    INHERIT_GIMPL_TYPES(GimplTypes);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
    // Support needed for the assembly of loops including all boundary condition effects such as conjugate bcs
 | 
					    // Support needed for the assembly of loops including all boundary condition effects such as conjugate bcs
 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -92,38 +101,28 @@ template<class Gimpl> class WilsonLoops;
 | 
				
			|||||||
	return true;
 | 
						return true;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    // Composition with smeared link, bc's etc.. probably need multiple inheritance
 | 
					    // Composition with smeared link, bc's etc.. probably need multiple inheritance
 | 
				
			||||||
    // Variable precision "S" and variable Nc
 | 
					    // Variable precision "S" and variable Nc
 | 
				
			||||||
    template<class S,int Nrepresentation=Nc>
 | 
					    template<class GimplTypes>
 | 
				
			||||||
    class ConjugateGaugeImpl { 
 | 
					    class ConjugateGaugeImpl : public GimplTypes { 
 | 
				
			||||||
    public:
 | 
					    public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      typedef S Simd;
 | 
					      INHERIT_GIMPL_TYPES(GimplTypes);
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
      template<typename vtype> using iImplGaugeLink          = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
 | 
					 | 
				
			||||||
      template<typename vtype> using iImplGaugeField         = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd  >;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
      typedef iImplGaugeLink    <Simd>           SiteGaugeLink;
 | 
					 | 
				
			||||||
      typedef iImplGaugeField   <Simd>           SiteGaugeField;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
      typedef Lattice<SiteGaugeLink>                GaugeLinkField; // bit ugly naming; polarised gauge field, lorentz... all ugly
 | 
					 | 
				
			||||||
      typedef Lattice<SiteGaugeField>                   GaugeField;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
    // Support needed for the assembly of loops including all boundary condition effects such as Gparity.
 | 
					    // Support needed for the assembly of loops including all boundary condition effects such as Gparity.
 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
    template<class covariant>  static
 | 
					    template<class covariant>  static
 | 
				
			||||||
    Lattice<covariant> CovShiftForward (const GaugeLinkField &Link, int mu, const Lattice<covariant> &field) {
 | 
					    Lattice<covariant> CovShiftForward (const GaugeLinkField &Link, int mu, const Lattice<covariant> &field) {
 | 
				
			||||||
      return GparityBC::CovShiftForward(Link,mu,field);
 | 
					      return ConjugateBC::CovShiftForward(Link,mu,field);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template<class covariant> static
 | 
					    template<class covariant> static
 | 
				
			||||||
    Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,const Lattice<covariant> &field) {
 | 
					    Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,const Lattice<covariant> &field) {
 | 
				
			||||||
      return GparityBC::CovShiftBackward(Link,mu,field);
 | 
					      return ConjugateBC::CovShiftBackward(Link,mu,field);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    static inline
 | 
					    static inline
 | 
				
			||||||
@@ -162,14 +161,17 @@ template<class Gimpl> class WilsonLoops;
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    typedef GaugeImplTypes<vComplex,Nc>     GimplTypesR;
 | 
				
			||||||
 | 
					    typedef GaugeImplTypes<vComplexF,Nc>    GimplTypesF;
 | 
				
			||||||
 | 
					    typedef GaugeImplTypes<vComplexD,Nc>    GimplTypesD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    typedef PeriodicGaugeImpl<vComplex ,Nc> PeriodicGimplR; // Real.. whichever prec
 | 
					    typedef PeriodicGaugeImpl<GimplTypesR> PeriodicGimplR; // Real.. whichever prec
 | 
				
			||||||
    typedef PeriodicGaugeImpl<vComplexF,Nc> PeriodicGimplF; // Float
 | 
					    typedef PeriodicGaugeImpl<GimplTypesF> PeriodicGimplF; // Float
 | 
				
			||||||
    typedef PeriodicGaugeImpl<vComplexD,Nc> PeriodicGimplD; // Double
 | 
					    typedef PeriodicGaugeImpl<GimplTypesD> PeriodicGimplD; // Double
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    typedef ConjugateGaugeImpl<vComplex ,Nc> ConjugateGimplR; // Real.. whichever prec
 | 
					    typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever prec
 | 
				
			||||||
    typedef ConjugateGaugeImpl<vComplexF,Nc> ConjugateGimplF; // Float
 | 
					    typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float
 | 
				
			||||||
    typedef ConjugateGaugeImpl<vComplexD,Nc> ConjugateGimplD; // Double
 | 
					    typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -55,7 +55,7 @@ namespace PeriodicBC {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace GparityBC { 
 | 
					namespace ConjugateBC { 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Must give right answers across boundary
 | 
					  // Must give right answers across boundary
 | 
				
			||||||
  //     <----
 | 
					  //     <----
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user