Merge branch 'release/0.10.0'

Merge branch 'release/0.9.0'
Flop cout matches DiRAC-ITT-2020
2025-06-23 02:02:02 +01:00 · 2023-03-29 16:35:33 -04:00 · 2023-03-29 15:27:58 -04:00 · 2020-11-16 17:15:34 +01:00
7 changed files with 297 additions and 59 deletions
--- a/Grid/communicator/Communicator_none.cc
+++ b/Grid/communicator/Communicator_none.cc
@ -128,7 +128,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
 							 int recv_from_rank,int dor,
 							 int xbytes,int rbytes, int dir)
 {
-  return xbytes+rbytes;
+  return 2.0*bytes;
 }
 void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
 {
--- a/Grid/communicator/SharedMemory.cc
+++ b/Grid/communicator/SharedMemory.cc
@ -91,59 +91,6 @@ void *SharedMemory::ShmBufferSelf(void)
  //std::cerr << "ShmBufferSelf "<<ShmRank<<" "<<std::hex<< ShmCommBufs[ShmRank] <<std::dec<<std::endl;
  return ShmCommBufs[ShmRank];
 }
-static inline int divides(int a,int b)
-{
-  return ( b == ( (b/a)*a ) );
-}
-void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims)
-{
-  ////////////////////////////////////////////////////////////////
-  // Allow user to configure through environment variable
-  ////////////////////////////////////////////////////////////////
-  char* str = getenv(("GRID_SHM_DIMS_" + std::to_string(ShmDims.size())).c_str());
-  if ( str ) {
-    std::vector<int> IntShmDims;
-    GridCmdOptionIntVector(std::string(str),IntShmDims);
-    assert(IntShmDims.size() == WorldDims.size());
-    long ShmSize = 1;
-    for (int dim=0;dim<WorldDims.size();dim++) {
-      ShmSize *= (ShmDims[dim] = IntShmDims[dim]);
-      assert(divides(ShmDims[dim],WorldDims[dim]));
-    }
-    assert(ShmSize == WorldShmSize);
-    return;
-  }
-  
-  ////////////////////////////////////////////////////////////////
-  // Powers of 2,3,5 only in prime decomposition for now
-  ////////////////////////////////////////////////////////////////
-  int ndimension = WorldDims.size();
-  ShmDims=Coordinate(ndimension,1);
-
-  std::vector<int> primes({2,3,5});
-
-  int dim = 0;
-  int last_dim = ndimension - 1;
-  int AutoShmSize = 1;
-  while(AutoShmSize != WorldShmSize) {
-    int p;
-    for(p=0;p<primes.size();p++) {
-      int prime=primes[p];
-      if ( divides(prime,WorldDims[dim]/ShmDims[dim])
-        && divides(prime,WorldShmSize/AutoShmSize)  ) {
-  AutoShmSize*=prime;
-  ShmDims[dim]*=prime;
-  last_dim = dim;
-  break;
-      }
-    }
-    if (p == primes.size() && last_dim == dim) {
-      std::cerr << "GlobalSharedMemory::GetShmDims failed" << std::endl;
-      exit(EXIT_FAILURE);
-    }
-    dim=(dim+1) %ndimension;
-  }
-}

 NAMESPACE_END(Grid); 

--- a/Grid/communicator/SharedMemoryMPI.cc
+++ b/Grid/communicator/SharedMemoryMPI.cc
@ -174,6 +174,55 @@ static inline int divides(int a,int b)
 {
  return ( b == ( (b/a)*a ) );
 }
+void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims)
+{
+  ////////////////////////////////////////////////////////////////
+  // Allow user to configure through environment variable
+  ////////////////////////////////////////////////////////////////
+  char* str = getenv(("GRID_SHM_DIMS_" + std::to_string(ShmDims.size())).c_str());
+  if ( str ) {
+    std::vector<int> IntShmDims;
+    GridCmdOptionIntVector(std::string(str),IntShmDims);
+    assert(IntShmDims.size() == WorldDims.size());
+    long ShmSize = 1;
+    for (int dim=0;dim<WorldDims.size();dim++) {
+      ShmSize *= (ShmDims[dim] = IntShmDims[dim]);
+      assert(divides(ShmDims[dim],WorldDims[dim]));
+    }
+    assert(ShmSize == WorldShmSize);
+    return;
+  }
+  
+  ////////////////////////////////////////////////////////////////
+  // Powers of 2,3,5 only in prime decomposition for now
+  ////////////////////////////////////////////////////////////////
+  int ndimension = WorldDims.size();
+  ShmDims=Coordinate(ndimension,1);
+
+  std::vector<int> primes({2,3,5});
+
+  int dim = 0;
+  int last_dim = ndimension - 1;
+  int AutoShmSize = 1;
+  while(AutoShmSize != WorldShmSize) {
+    int p;
+    for(p=0;p<primes.size();p++) {
+      int prime=primes[p];
+      if ( divides(prime,WorldDims[dim]/ShmDims[dim])
+        && divides(prime,WorldShmSize/AutoShmSize)  ) {
+	AutoShmSize*=prime;
+	ShmDims[dim]*=prime;
+	last_dim = dim;
+	break;
+      }
+    }
+    if (p == primes.size() && last_dim == dim) {
+      std::cerr << "GlobalSharedMemory::GetShmDims failed" << std::endl;
+      exit(EXIT_FAILURE);
+    }
+    dim=(dim+1) %ndimension;
+  }
+}
 void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
 {
  ////////////////////////////////////////////////////////////////
--- a/Grid/stencil/Stencil.h
+++ b/Grid/stencil/Stencil.h
@ -434,7 +434,6 @@ public:
  ////////////////////////////////////////////////////////////////////////
  void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
  {
-    accelerator_barrier();
    for(int i=0;i<Packets.size();i++){
      _grid->StencilSendToRecvFromBegin(MpiReqs,
 					Packets[i].send_buf,
--- a/Grid/threads/Accelerator.h
+++ b/Grid/threads/Accelerator.h
@ -458,8 +458,7 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream);
 // Common on all GPU targets
 //////////////////////////////////////////////
 #if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP)
-// FIXME -- the non-blocking nature got broken March 30 2023 by PAB
-#define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );  
+#define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );

 #define accelerator_for( iter, num, nsimd, ... )		\
  accelerator_forNB(iter, num, nsimd, { __VA_ARGS__ } );	\
@ -526,7 +525,7 @@ inline void acceleratorFreeCpu  (void *ptr){free(ptr);};
 //////////////////////////////////////////////

 #ifdef GRID_SYCL
-inline void acceleratorFenceComputeStream(void){ theGridAccelerator->submit_barrier();};
+inline void acceleratorFenceComputeStream(void){ accelerator_barrier();};
 #else
 // Ordering within a stream guaranteed on Nvidia & AMD
 inline void acceleratorFenceComputeStream(void){ };
--- a/tests/core/Test_compact_wilson_clover_speedup.cc
+++ b/tests/core/Test_compact_wilson_clover_speedup.cc
@ -53,7 +53,7 @@ static int readInt(int* argc, char*** argv, std::string&& option, int defaultVal

 static float readFloat(int* argc, char*** argv, std::string&& option, float defaultValue) {
  std::string arg;
-  double      ret = defaultValue;
+  float       ret = defaultValue;
  if(checkPresent(argc, argv, option)) {
    arg = getContent(argc, argv, option);
    GridCmdOptionFloat(arg, ret);
--- a/tests/core/Test_fft_matt.cc
+++ b/tests/core/Test_fft_matt.cc
@ -0,0 +1,244 @@
+    /*************************************************************************************
+
+Gamma::Algebra Gmu [] = {
+  Gamma::Algebra::GammaX,
+  Gamma::Algebra::GammaY,
+  Gamma::Algebra::GammaZ,
+  Gamma::Algebra::GammaT,
+  Gamma::Algebra::Gamma5
+};
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int threads = GridThread::GetThreads();
+  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
+  Coordinate latt_size   = GridDefaultLatt();
+  Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
+  Coordinate mpi_layout  = GridDefaultMpi();
+
+  int vol = 1;
+  for(int d=0;d<latt_size.size();d++){
+    vol = vol * latt_size[d];
+  }
+  GridCartesian         GRID(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian RBGRID(&GRID);
+
+  LatticeComplexD    coor(&GRID);
+
+  ComplexD ci(0.0,1.0);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridSerialRNG          sRNG;  sRNG.SeedFixedIntegers(seeds); // naughty seeding
+  GridParallelRNG          pRNG(&GRID);
+  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeGaugeFieldD Umu(&GRID);
+  SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
+
+  ////////////////////////////////////////////////////
+  // Wilson test
+  ////////////////////////////////////////////////////
+  {
+    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
+    LatticeFermionD    src_p(&GRID);
+    LatticeFermionD    tmp(&GRID);
+    LatticeFermionD    ref(&GRID);
+    LatticeFermionD    result(&GRID);
+    
+    RealD mass=0.1;
+    WilsonFermionD Dw(Umu,GRID,RBGRID,mass);
+    
+    Dw.M(src,ref);
+    std::cout << "Norm src "<<norm2(src)<<std::endl;
+    std::cout << "Norm Dw x src "<<norm2(ref)<<std::endl;
+    {
+      FFT theFFT(&GRID);
+
+      ////////////////
+      // operator in Fourier space
+      ////////////////
+      tmp =ref;
+      theFFT.FFT_all_dim(result,tmp,FFT::forward);
+      std::cout<<"FFT[ Dw x src ]  "<< norm2(result)<<std::endl;    
+
+      tmp = src;
+      theFFT.FFT_all_dim(src_p,tmp,FFT::forward);
+      std::cout<<"FFT[ src      ]  "<< norm2(src_p)<<std::endl;
+      
+      /////////////////////////////////////////////////////////////////
+      // work out the predicted FT from Fourier
+      /////////////////////////////////////////////////////////////////
+      auto FGrid = &GRID;
+      LatticeFermionD    Kinetic(FGrid); Kinetic = Zero();
+      LatticeComplexD    kmu(FGrid); 
+      LatticeInteger     scoor(FGrid); 
+      LatticeComplexD    sk (FGrid); sk = Zero();
+      LatticeComplexD    sk2(FGrid); sk2= Zero();
+      LatticeComplexD    W(FGrid); W= Zero();
+      LatticeComplexD    one(FGrid); one =ComplexD(1.0,0.0);
+      ComplexD ci(0.0,1.0);
+    
+      for(int mu=0;mu<Nd;mu++) {
+	
+	RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
+
+	LatticeCoordinate(kmu,mu);
+
+	kmu = TwoPiL * kmu;
+      
+	sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
+	sk  = sk  +     sin(kmu)    *sin(kmu); 
+      
+	// -1/2 Dw ->  1/2 gmu (eip - emip) = i sinp gmu
+	Kinetic = Kinetic + sin(kmu)*ci*(Gamma(Gmu[mu])*src_p);
+	
+      }
+    
+      W = mass + sk2; 
+      Kinetic = Kinetic + W * src_p;
+    
+      std::cout<<"Momentum space src         "<< norm2(src_p)<<std::endl;
+      std::cout<<"Momentum space Dw x src    "<< norm2(Kinetic)<<std::endl;
+      std::cout<<"FT[Coordinate space Dw]    "<< norm2(result)<<std::endl;
+    
+      result = result - Kinetic;
+      std::cout<<"diff "<< norm2(result)<<std::endl;
+      
+    }
+
+    std::cout << " =======================================" <<std::endl;
+    std::cout << " Checking FourierFreePropagator x Dw = 1" <<std::endl;
+    std::cout << " =======================================" <<std::endl;
+    std::cout << "Dw src = " <<norm2(src)<<std::endl;
+    std::cout << "Dw tmp = " <<norm2(tmp)<<std::endl;
+    Dw.M(src,tmp);
+
+    Dw.FreePropagator(tmp,ref,mass);
+
+    std::cout << "Dw ref = " <<norm2(ref)<<std::endl;
+    
+    ref = ref - src;
+    
+    std::cout << "Dw ref-src = " <<norm2(ref)<<std::endl;
+  }
+
+
+  ////////////////////////////////////////////////////
+  // Wilson prop
+  ////////////////////////////////////////////////////
+  {
+    std::cout<<"****************************************"<<std::endl;
+    std::cout << "Wilson Mom space 4d propagator \n";
+    std::cout<<"****************************************"<<std::endl;
+
+    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
+    LatticeFermionD    tmp(&GRID);
+    LatticeFermionD    ref(&GRID);
+    LatticeFermionD    diff(&GRID);
+
+    src=Zero();
+    Coordinate point(4,0); // 0,0,0,0
+    SpinColourVectorD ferm;
+    ferm=Zero();
+    ferm()(0)(0) = ComplexD(1.0);
+    pokeSite(ferm,src,point);
+
+    RealD mass=0.1;
+
+    WilsonFermionD Dw(Umu,GRID,RBGRID,mass);
+
+    // Momentum space prop
+    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
+    Dw.FreePropagator(src,ref,mass) ;
+
+    Gamma G5(Gamma::Algebra::Gamma5);
+
+    LatticeFermionD    result(&GRID); 
+    const int sdir=0;
+    
+    ////////////////////////////////////////////////////////////////////////
+    // Conjugate gradient on normal equations system
+    ////////////////////////////////////////////////////////////////////////
+    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
+    Dw.Mdag(src,tmp);
+    src=tmp;
+    MdagMLinearOperator<WilsonFermionD,LatticeFermionD> HermOp(Dw);
+    ConjugateGradient<LatticeFermionD> CG(1.0e-10,10000);
+    CG(HermOp,src,result);
+    
+    ////////////////////////////////////////////////////////////////////////
+    std::cout << " Taking difference" <<std::endl;
+    std::cout << "Dw result "<<norm2(result)<<std::endl;
+    std::cout << "Dw ref     "<<norm2(ref)<<std::endl;
+    
+    diff = ref - result;
+    std::cout << "result - ref     "<<norm2(diff)<<std::endl;
+
+    DumpSliceNorm("Slice Norm Solution ",result,Nd-1);
+  }
+
+  ////////////////////////////////////////////////////
+  //Gauge invariance test
+  ////////////////////////////////////////////////////
+  {
+    std::cout<<"****************************************"<<std::endl;
+    std::cout << "Gauge invariance test \n";
+    std::cout<<"****************************************"<<std::endl;
+    LatticeGaugeField     U_GT(&GRID); // Gauge transformed field
+    LatticeColourMatrix   g(&GRID);    // local Gauge xform matrix
+    U_GT = Umu;
+    // Make a random xform to teh gauge field
+    SU<Nc>::RandomGaugeTransform(pRNG,U_GT,g); // Unit gauge
+
+    LatticeFermionD    src(&GRID);
+    LatticeFermionD    tmp(&GRID);
+    LatticeFermionD    ref(&GRID);
+    LatticeFermionD    diff(&GRID);
+
+    // could loop over colors
+    src=Zero();
+    Coordinate point(4,0); // 0,0,0,0
+    SpinColourVectorD ferm;
+    ferm=Zero();
+    ferm()(0)(0) = ComplexD(1.0);
+    pokeSite(ferm,src,point);
+
+    RealD mass=0.1;
+    WilsonFermionD Dw(U_GT,GRID,RBGRID,mass);
+
+    // Momentum space prop
+    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
+    Dw.FreePropagator(src,ref,mass) ;
+
+    Gamma G5(Gamma::Algebra::Gamma5);
+
+    LatticeFermionD    result(&GRID); 
+    const int sdir=0;
+    
+    ////////////////////////////////////////////////////////////////////////
+    // Conjugate gradient on normal equations system
+    ////////////////////////////////////////////////////////////////////////
+    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
+    Dw.Mdag(src,tmp);
+    src=tmp;
+    MdagMLinearOperator<WilsonFermionD,LatticeFermionD> HermOp(Dw);
+    ConjugateGradient<LatticeFermionD> CG(1.0e-10,10000);
+    CG(HermOp,src,result);
+    
+    ////////////////////////////////////////////////////////////////////////
+    std::cout << " Taking difference" <<std::endl;
+    std::cout << "Dw result "<<norm2(result)<<std::endl;
+    std::cout << "Dw ref     "<<norm2(ref)<<std::endl;
+    
+    diff = ref - result;
+    std::cout << "result - ref     "<<norm2(diff)<<std::endl;
+
+    DumpSliceNorm("Slice Norm Solution ",result,Nd-1);
+  }
+  
+  
+  Grid_finalize();
+}
Author	SHA1	Message	Date
Peter Boyle	12d20d8e15	Merge branch 'release/0.10.0'	2023-03-29 16:35:33 -04:00
Peter Boyle	25777e5967	Merge branch 'release/0.9.0'	2023-03-29 15:27:58 -04:00
Peter Boyle	deab11e68b	Flop cout matches DiRAC-ITT-2020	2020-11-16 17:15:34 +01:00