mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Adding persistent communicators
This commit is contained in:
		@@ -196,5 +196,126 @@ int main (int argc, char ** argv)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  for(int lat=4;lat<=32;lat+=2){
 | 
			
		||||
    for(int Ls=1;Ls<=16;Ls*=2){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
 | 
			
		||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
      std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
      std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      int ncomm;
 | 
			
		||||
      int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      std::vector<CartesianCommunicator::CommsRequest_t> empty;
 | 
			
		||||
      std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_fwd(Nd,empty);
 | 
			
		||||
      std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_bwd(Nd,empty);
 | 
			
		||||
 | 
			
		||||
      for(int mu=0;mu<4;mu++){
 | 
			
		||||
	ncomm=0;
 | 
			
		||||
	if (mpi_layout[mu]>1 ) {
 | 
			
		||||
	  ncomm++;
 | 
			
		||||
 | 
			
		||||
	  int comm_proc;
 | 
			
		||||
	  int xmit_to_rank;
 | 
			
		||||
	  int recv_from_rank;
 | 
			
		||||
 | 
			
		||||
	  comm_proc=1;
 | 
			
		||||
	  Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
			
		||||
	  Grid.SendToRecvFromInit(requests_fwd[mu],
 | 
			
		||||
				  (void *)&xbuf[mu][0],
 | 
			
		||||
				  xmit_to_rank,
 | 
			
		||||
				  (void *)&rbuf[mu][0],
 | 
			
		||||
				  recv_from_rank,
 | 
			
		||||
				  bytes);
 | 
			
		||||
 | 
			
		||||
	  comm_proc = mpi_layout[mu]-1;
 | 
			
		||||
	  Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
			
		||||
	  Grid.SendToRecvFromInit(requests_bwd[mu],
 | 
			
		||||
				  (void *)&xbuf[mu+4][0],
 | 
			
		||||
				  xmit_to_rank,
 | 
			
		||||
				  (void *)&rbuf[mu+4][0],
 | 
			
		||||
				  recv_from_rank,
 | 
			
		||||
				  bytes);
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      {
 | 
			
		||||
	double start=usecond();
 | 
			
		||||
	for(int i=0;i<Nloop;i++){
 | 
			
		||||
	  
 | 
			
		||||
	  for(int mu=0;mu<4;mu++){
 | 
			
		||||
	    
 | 
			
		||||
	    if (mpi_layout[mu]>1 ) {
 | 
			
		||||
	      
 | 
			
		||||
	      Grid.SendToRecvFromBegin(requests_fwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromComplete(requests_fwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromBegin(requests_bwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromComplete(requests_bwd[mu]);
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	  Grid.Barrier();
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	double stop=usecond();
 | 
			
		||||
	
 | 
			
		||||
	double dbytes    = bytes;
 | 
			
		||||
	double xbytes    = Nloop*dbytes*2.0*ncomm;
 | 
			
		||||
	double rbytes    = xbytes;
 | 
			
		||||
	double bidibytes = xbytes+rbytes;
 | 
			
		||||
	
 | 
			
		||||
	double time = stop-start;
 | 
			
		||||
	
 | 
			
		||||
	std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      {
 | 
			
		||||
	double start=usecond();
 | 
			
		||||
	for(int i=0;i<Nloop;i++){
 | 
			
		||||
	  
 | 
			
		||||
	  for(int mu=0;mu<4;mu++){
 | 
			
		||||
	    
 | 
			
		||||
	    if (mpi_layout[mu]>1 ) {
 | 
			
		||||
	      
 | 
			
		||||
	      Grid.SendToRecvFromBegin(requests_fwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromBegin(requests_bwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromComplete(requests_fwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromComplete(requests_bwd[mu]);
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	  Grid.Barrier();
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	double stop=usecond();
 | 
			
		||||
	
 | 
			
		||||
	double dbytes    = bytes;
 | 
			
		||||
	double xbytes    = Nloop*dbytes*2.0*ncomm;
 | 
			
		||||
	double rbytes    = xbytes;
 | 
			
		||||
	double bidibytes = xbytes+rbytes;
 | 
			
		||||
	
 | 
			
		||||
	double time = stop-start;
 | 
			
		||||
	
 | 
			
		||||
	std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -37,12 +37,6 @@ namespace Grid {
 | 
			
		||||
  static const int Even   =CbRed;
 | 
			
		||||
  static const int Odd    =CbBlack;
 | 
			
		||||
    
 | 
			
		||||
    // Perhaps these are misplaced and 
 | 
			
		||||
    // should be in sparse matrix.
 | 
			
		||||
    // Also should make these a named enum type
 | 
			
		||||
    static const int DaggerNo=0;
 | 
			
		||||
    static const int DaggerYes=1;
 | 
			
		||||
 | 
			
		||||
// Specialise this for red black grids storing half the data like a chess board.
 | 
			
		||||
class GridRedBlackCartesian : public GridBase
 | 
			
		||||
{
 | 
			
		||||
 
 | 
			
		||||
@@ -127,12 +127,21 @@ class CartesianCommunicator {
 | 
			
		||||
			int recv_from_rank,
 | 
			
		||||
			int bytes);
 | 
			
		||||
 | 
			
		||||
    void SendToRecvFromInit(std::vector<CommsRequest_t> &list,
 | 
			
		||||
			    void *xmit,
 | 
			
		||||
			    int xmit_to_rank,
 | 
			
		||||
			    void *recv,
 | 
			
		||||
			    int recv_from_rank,
 | 
			
		||||
			    int bytes);
 | 
			
		||||
 | 
			
		||||
    void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
			 void *xmit,
 | 
			
		||||
			 int xmit_to_rank,
 | 
			
		||||
			 void *recv,
 | 
			
		||||
			 int recv_from_rank,
 | 
			
		||||
			 int bytes);
 | 
			
		||||
 | 
			
		||||
    void SendToRecvFromBegin(std::vector<CommsRequest_t> &list);
 | 
			
		||||
    void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -144,7 +144,8 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
 | 
			
		||||
					       void *xmit,
 | 
			
		||||
					       int dest,
 | 
			
		||||
					       void *recv,
 | 
			
		||||
@@ -155,14 +156,26 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
  int rank = _processor;
 | 
			
		||||
  int ierr;
 | 
			
		||||
  ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
			
		||||
  ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
 | 
			
		||||
  
 | 
			
		||||
  ierr =MPI_Send_init(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
			
		||||
  ierr|=MPI_Recv_init(recv, bytes, MPI_CHAR,dest,_processor,communicator,&rrq);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
 | 
			
		||||
  list.push_back(xrq);
 | 
			
		||||
  list.push_back(rrq);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Startall(list.size(),&list[0]);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
  SendToRecvFromInit(list,xmit,dest,recv,from,bytes);
 | 
			
		||||
  SendToRecvFromBegin(list);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  int nreq=list.size();
 | 
			
		||||
 
 | 
			
		||||
@@ -84,6 +84,19 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
 
 | 
			
		||||
@@ -268,6 +268,10 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  assert(0); //unimplemented
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
@@ -280,6 +284,15 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
 | 
			
		||||
  //  shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
 | 
			
		||||
  shmem_putmem(recv,xmit,bytes,dest);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
  assert(0); // Unimplemented
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  //  shmem_quiet();      // I'm done
 | 
			
		||||
 
 | 
			
		||||
@@ -101,6 +101,7 @@ public:
 | 
			
		||||
    int begin(void) { return 0;};
 | 
			
		||||
    int end(void)   { return _odata.size(); }
 | 
			
		||||
    vobj & operator[](int i) { return _odata[i]; };
 | 
			
		||||
    const vobj & operator[](int i) const { return _odata[i]; };
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
    typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
 
 | 
			
		||||
@@ -60,6 +60,12 @@ namespace QCD {
 | 
			
		||||
    static const int SpinIndex   = 1;
 | 
			
		||||
    static const int LorentzIndex= 0;
 | 
			
		||||
 | 
			
		||||
    // Also should make these a named enum type
 | 
			
		||||
    static const int DaggerNo=0;
 | 
			
		||||
    static const int DaggerYes=1;
 | 
			
		||||
    static const int InverseNo=0;
 | 
			
		||||
    static const int InverseYes=1;
 | 
			
		||||
 | 
			
		||||
    // Useful traits is this a spin index
 | 
			
		||||
    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -28,7 +28,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Eigen/Dense>
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
@@ -95,22 +99,6 @@ namespace QCD {
 | 
			
		||||
    FermionField Din(psi._grid);
 | 
			
		||||
 | 
			
		||||
    // Assemble Din
 | 
			
		||||
    /*
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	//	Din = bs psi[s] + cs[s] psi[s+1}
 | 
			
		||||
	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
 | 
			
		||||
	//      Din+= -mass*cs[s] psi[s+1}
 | 
			
		||||
	axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
 | 
			
		||||
	axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    */
 | 
			
		||||
    Meooe5D(psi,Din);
 | 
			
		||||
 | 
			
		||||
    this->DW(Din,chi,DaggerNo);
 | 
			
		||||
@@ -152,18 +140,6 @@ namespace QCD {
 | 
			
		||||
      // Collect the terms in DW
 | 
			
		||||
      //	Chi = bs Din[s] + cs[s] Din[s+1}
 | 
			
		||||
      //    Chi+= -mass*cs[s] psi[s+1}
 | 
			
		||||
      /*
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,-mass*cs[Ls-1],Din,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pplus (chi,bs[s],Din,-mass*cs[0],Din,s,0);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
      */
 | 
			
		||||
 | 
			
		||||
      // FIXME just call MooeeDag??
 | 
			
		||||
 | 
			
		||||
@@ -193,24 +169,6 @@ namespace QCD {
 | 
			
		||||
    FermionField tmp(psi._grid);
 | 
			
		||||
    // Assemble the 5d matrix
 | 
			
		||||
    Meooe5D(psi,tmp); 
 | 
			
		||||
#if 0
 | 
			
		||||
    std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl;
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	//	tmp = bs psi[s] + cs[s] psi[s+1}
 | 
			
		||||
	//      tmp+= -mass*cs[s] psi[s+1}
 | 
			
		||||
	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
 | 
			
		||||
	axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
 | 
			
		||||
	axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
 | 
			
		||||
	axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << "Meooe Test replacement norm2 tmp old = " <<norm2(tmp)<<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    // Apply 4d dslash
 | 
			
		||||
    if ( psi.checkerboard == Odd ) {
 | 
			
		||||
@@ -232,24 +190,6 @@ namespace QCD {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    MeooeDag5D(tmp,chi); 
 | 
			
		||||
#if 0
 | 
			
		||||
    std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl;
 | 
			
		||||
    // Assemble the 5d matrix
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==0 ) {
 | 
			
		||||
	axpby_ssp_pplus(chi,beo[s],tmp,   -ceo[s+1]  ,tmp,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,   1.0,chi,mass*ceo[Ls-1],tmp,s,Ls-1);
 | 
			
		||||
      } else if ( s==(Ls-1)) { 
 | 
			
		||||
	axpby_ssp_pplus(chi,beo[s],tmp,mass*ceo[0],tmp,s,0);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0,chi,-ceo[s-1],tmp,s,s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
	axpby_ssp_pplus(chi,beo[s],tmp,-ceo[s+1],tmp,s,s+1);
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0   ,chi,-ceo[s-1],tmp,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << "Meooe Test replacement norm2 chi old = " <<norm2(chi)<<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -269,6 +209,10 @@ namespace QCD {
 | 
			
		||||
	axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    FermionField temp(psi._grid);
 | 
			
		||||
    this->MooeeDenseInternal(psi,temp,DaggerNo,InverseNo);
 | 
			
		||||
    temp = temp - chi;
 | 
			
		||||
    std::cout << "Difference between dense and normal "<<  norm2(temp) <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
@@ -311,11 +255,139 @@ namespace QCD {
 | 
			
		||||
	axpby_ssp_pminus(chi,1.0   ,chi,-cee[s-1],psi,s,s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    FermionField temp(psi._grid);
 | 
			
		||||
    this->MooeeDenseInternal(psi,temp,DaggerYes,InverseNo);
 | 
			
		||||
    temp = temp - chi;
 | 
			
		||||
    std::cout << "Difference between dense and normal "<<  norm2(temp) <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    FermionField temp(psi._grid);
 | 
			
		||||
    this->MooeeLDUInv(psi,chi);
 | 
			
		||||
    this->MooeeDenseInv(psi,temp);
 | 
			
		||||
    temp = temp - chi;
 | 
			
		||||
    std::cout << "Difference between dense and normal "<<  norm2(temp) <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    FermionField temp(psi._grid);
 | 
			
		||||
    this->MooeeLDUInvDag(psi,chi);
 | 
			
		||||
    this->MooeeDenseInvDag(psi,temp);
 | 
			
		||||
    temp = temp - chi;
 | 
			
		||||
    std::cout << "Difference between dense and normal "<<  norm2(temp) <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeDenseInvDag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeDenseInternal(psi,chi,DaggerYes,InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeDenseInv(const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeDenseInternal(psi,chi,DaggerNo,InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeDenseInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
    int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard=psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    assert(Ls==LLs);
 | 
			
		||||
 | 
			
		||||
    Eigen::MatrixXd Pplus  = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
    Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      Pplus(s,s) = bee[s];
 | 
			
		||||
      Pminus(s,s)= bee[s];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int s=0;s<Ls-1;s++){
 | 
			
		||||
      Pminus(s,s+1) = -cee[s];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int s=0;s<Ls-1;s++){
 | 
			
		||||
      Pplus(s+1,s) = -cee[s];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout << " Pplus  "<<Pplus<<std::endl;
 | 
			
		||||
    std::cout << " Pminus "<<Pminus<<std::endl;
 | 
			
		||||
    Pplus (0,Ls-1) = mass*cee[0];
 | 
			
		||||
    Pminus(Ls-1,0) = mass*cee[Ls-1];
 | 
			
		||||
    
 | 
			
		||||
    Eigen::MatrixXd PplusMat ;
 | 
			
		||||
    Eigen::MatrixXd PminusMat;
 | 
			
		||||
 | 
			
		||||
    if ( inv ) {
 | 
			
		||||
      PplusMat =Pplus.inverse();
 | 
			
		||||
      PminusMat=Pminus.inverse();
 | 
			
		||||
    } else { 
 | 
			
		||||
      PplusMat =Pplus;
 | 
			
		||||
      PminusMat=Pminus;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(dag){
 | 
			
		||||
      PplusMat.adjointInPlace();
 | 
			
		||||
      PminusMat.adjointInPlace();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // For the non-vectorised s-direction this is simple
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
    for(auto site=0;site<vol;site++){
 | 
			
		||||
 | 
			
		||||
      SiteSpinor     SiteChi;
 | 
			
		||||
      SiteHalfSpinor SitePplus;
 | 
			
		||||
      SiteHalfSpinor SitePminus;
 | 
			
		||||
 | 
			
		||||
      for(int s1=0;s1<Ls;s1++){
 | 
			
		||||
	SiteChi =zero;
 | 
			
		||||
	for(int s2=0;s2<Ls;s2++){
 | 
			
		||||
	  int lex2 = s2+Ls*site;
 | 
			
		||||
 | 
			
		||||
	  if ( PplusMat(s1,s2) != 0.0 ) {
 | 
			
		||||
	    spProj5p(SitePplus,psi[lex2]);
 | 
			
		||||
	    accumRecon5p(SiteChi,PplusMat (s1,s2)*SitePplus);
 | 
			
		||||
	  }
 | 
			
		||||
	  
 | 
			
		||||
	  if ( PminusMat(s1,s2) != 0.0 ) {
 | 
			
		||||
	    spProj5m(SitePminus,psi[lex2]);
 | 
			
		||||
	    accumRecon5m(SiteChi,PminusMat(s1,s2)*SitePminus);
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	chi[s1+Ls*site] = SiteChi;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    // For the non-vectorised s-direction we have more work to do in an alternate implementation
 | 
			
		||||
    // 
 | 
			
		||||
    //  ai = Mij bj
 | 
			
		||||
    //
 | 
			
		||||
    //  where b == {b0} {b1}.. 
 | 
			
		||||
    //
 | 
			
		||||
    //  Best to load/bcast b0
 | 
			
		||||
    //  
 | 
			
		||||
    //   ai  = Mi0 b0
 | 
			
		||||
    //   for(int i=0;i<Ls;i++){
 | 
			
		||||
    //     ai += Mij bj
 | 
			
		||||
    //   }
 | 
			
		||||
    //
 | 
			
		||||
    //   For rb5d shamir DWF, the Moo is proportional to the identity
 | 
			
		||||
    //   For rb4d Moo, we need to set a vector of coeffs, and use a stencil type construct, which is cheap.
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeLDUInv    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    chi.checkerboard=psi.checkerboard;
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    // Apply (L^{\prime})^{-1}
 | 
			
		||||
    axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0]
 | 
			
		||||
@@ -340,8 +412,9 @@ namespace QCD {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  void CayleyFermion5D<Impl>::MooeeLDUInvDag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    chi.checkerboard=psi.checkerboard;
 | 
			
		||||
    int Ls=this->Ls;
 | 
			
		||||
    // Apply (U^{\prime})^{-dagger}
 | 
			
		||||
    axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0]
 | 
			
		||||
@@ -364,6 +437,7 @@ namespace QCD {
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // force terms; five routines; default to Dhop on diagonal
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void CayleyFermion5D<Impl>::MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
 | 
			
		||||
@@ -416,7 +490,6 @@ namespace QCD {
 | 
			
		||||
  void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
 | 
			
		||||
  {
 | 
			
		||||
    SetCoefficientsZolotarev(1.0,zdata,b,c);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  //Zolo
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
@@ -524,8 +597,11 @@ namespace QCD {
 | 
			
		||||
      for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
 | 
			
		||||
      dee[Ls-1] += delta_d;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  FermOpTemplateInstantiate(CayleyFermion5D);
 | 
			
		||||
  GparityFermOpTemplateInstantiate(CayleyFermion5D);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -51,6 +51,11 @@ namespace Grid {
 | 
			
		||||
      virtual void   MooeeDag    (const FermionField &in, FermionField &out);
 | 
			
		||||
      virtual void   MooeeInv    (const FermionField &in, FermionField &out);
 | 
			
		||||
      virtual void   MooeeInvDag (const FermionField &in, FermionField &out);
 | 
			
		||||
      virtual void   MooeeLDUInv    (const FermionField &in, FermionField &out);
 | 
			
		||||
      virtual void   MooeeLDUInvDag (const FermionField &in, FermionField &out);
 | 
			
		||||
      virtual void   MooeeDenseInv    (const FermionField &in, FermionField &out);
 | 
			
		||||
      virtual void   MooeeDenseInvDag (const FermionField &in, FermionField &out);
 | 
			
		||||
      void   MooeeDenseInternal(const FermionField &in, FermionField &out,int dag,int inv);
 | 
			
		||||
      virtual void   Instantiatable(void)=0;
 | 
			
		||||
 | 
			
		||||
      // force terms; five routines; default to Dhop on diagonal
 | 
			
		||||
@@ -94,6 +99,7 @@ namespace Grid {
 | 
			
		||||
		      GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
		      RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    protected:
 | 
			
		||||
      void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
 | 
			
		||||
      void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
 | 
			
		||||
 
 | 
			
		||||
@@ -20,10 +20,10 @@ $(top_srcdir)/prerequisites/$(FFTWTAR):
 | 
			
		||||
 | 
			
		||||
Eigen:  $(top_srcdir)/prerequisites/$(EIGENTAR)
 | 
			
		||||
	tar xvf $(top_srcdir)/prerequisites/$(EIGENTAR)
 | 
			
		||||
	- rm -rf  ../include/Grid/Eigen
 | 
			
		||||
	- rm -rf  $(top_srcdir)/lib/Eigen
 | 
			
		||||
	mv eigen-eigen*/Eigen .
 | 
			
		||||
	echo EFILES=`find Eigen -type f -name '*.h' ` > $(top_srcdir)/lib/Eigen.inc
 | 
			
		||||
	mv Eigen ../include/Grid/
 | 
			
		||||
	mv Eigen $(top_srcdir)/lib/
 | 
			
		||||
	touch Eigen
 | 
			
		||||
 | 
			
		||||
FFTW: $(top_srcdir)/prerequisites/$(FFTWTAR)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,9 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
 | 
			
		||||
SUBDIRS = core
 | 
			
		||||
#SUBDIRS = core
 | 
			
		||||
 | 
			
		||||
# Uncomment to enable complete test suite build
 | 
			
		||||
#SUBDIRS = core forces hmc solver debug	
 | 
			
		||||
SUBDIRS = core forces hmc solver debug	
 | 
			
		||||
 | 
			
		||||
if BUILD_CHROMA_REGRESSION
 | 
			
		||||
  SUBDIRS+= qdpxx
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user