mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Merge branch 'develop' into release/v0.6.0
# Conflicts: # tests/core/Test_fft_gfix.cc
This commit is contained in:
		@@ -3,7 +3,7 @@ SUBDIRS = lib benchmarks tests
 | 
			
		||||
 | 
			
		||||
.PHONY: tests
 | 
			
		||||
 | 
			
		||||
tests:
 | 
			
		||||
tests: all
 | 
			
		||||
	$(MAKE) -C tests tests
 | 
			
		||||
 | 
			
		||||
AM_CXXFLAGS += -I$(top_builddir)/include
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										44
									
								
								README
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								README
									
									
									
									
									
								
							@@ -1,44 +0,0 @@
 | 
			
		||||
This library provides data parallel C++ container classes with internal memory layout
 | 
			
		||||
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
 | 
			
		||||
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
 | 
			
		||||
array indices to both MPI tasks and SIMD processing elements.
 | 
			
		||||
 | 
			
		||||
* Identically shaped arrays then be processed with perfect data parallelisation.
 | 
			
		||||
* Such identically shapped arrays are called conformable arrays.
 | 
			
		||||
 | 
			
		||||
The transformation is based on the observation that Cartesian array processing involves
 | 
			
		||||
identical processing to be performed on different regions of the Cartesian array.
 | 
			
		||||
 | 
			
		||||
The library will (eventually) both geometrically decompose into MPI tasks and across SIMD lanes.
 | 
			
		||||
 | 
			
		||||
Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but
 | 
			
		||||
optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification
 | 
			
		||||
for most programmers.
 | 
			
		||||
 | 
			
		||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
 | 
			
		||||
Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
 | 
			
		||||
 | 
			
		||||
These are presented as 
 | 
			
		||||
 | 
			
		||||
  vRealF, vRealD, vComplexF, vComplexD 
 | 
			
		||||
 | 
			
		||||
internal vector data types. These may be useful in themselves for other programmers.
 | 
			
		||||
The corresponding scalar types are named
 | 
			
		||||
 | 
			
		||||
  RealF, RealD, ComplexF, ComplexD
 | 
			
		||||
 | 
			
		||||
MPI parallelism is UNIMPLEMENTED and for now only OpenMP and SIMD parallelism is present in the library.
 | 
			
		||||
 | 
			
		||||
   You can give `configure' initial values for configuration parameters
 | 
			
		||||
by setting variables in the command line or in the environment.  Here
 | 
			
		||||
is are examples:
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
 | 
			
		||||
     
 | 
			
		||||
     
 | 
			
		||||
@@ -126,7 +126,7 @@ If you want to build all the tests at once just use `make tests`.
 | 
			
		||||
 | 
			
		||||
### Possible communication interfaces
 | 
			
		||||
 | 
			
		||||
The following options can be use with the `--enable-simd=` option to target different communication interfaces:
 | 
			
		||||
The following options can be use with the `--enable-comms=` option to target different communication interfaces:
 | 
			
		||||
 | 
			
		||||
| `<comm>`       | Description                                                   |
 | 
			
		||||
| -------------- | ------------------------------------------------------------- |
 | 
			
		||||
 
 | 
			
		||||
@@ -30,8 +30,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#define _GRID_FFT_H_
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
#ifdef USE_MKL
 | 
			
		||||
#include <fftw/fftw3.h>
 | 
			
		||||
#else
 | 
			
		||||
#include <fftw3.h>
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
@@ -42,7 +42,7 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 | 
			
		||||
  static void GaugeLinkToLieAlgebraField(const std::vector<GaugeMat> &U,std::vector<GaugeMat> &A) {
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
//      ImplComplex cmi(0.0,-1.0);
 | 
			
		||||
      ComplexD cmi(0.0,-1.0);
 | 
			
		||||
      Complex cmi(0.0,-1.0);
 | 
			
		||||
      A[mu] = Ta(U[mu]) * cmi;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
@@ -52,13 +52,13 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 | 
			
		||||
      dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
 | 
			
		||||
    }
 | 
			
		||||
  }  
 | 
			
		||||
  static void SteepestDescentGaugeFix(GaugeLorentz &Umu,RealD & alpha,int maxiter,RealD Omega_tol, RealD Phi_tol) {
 | 
			
		||||
  static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol) {
 | 
			
		||||
    GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
    RealD org_plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
 | 
			
		||||
    RealD org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 
 | 
			
		||||
    RealD old_trace = org_link_trace;
 | 
			
		||||
    RealD trG;
 | 
			
		||||
    Real org_plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
 | 
			
		||||
    Real org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 
 | 
			
		||||
    Real old_trace = org_link_trace;
 | 
			
		||||
    Real trG;
 | 
			
		||||
 | 
			
		||||
    std::vector<GaugeMat> U(Nd,grid);
 | 
			
		||||
                 GaugeMat dmuAmu(grid);
 | 
			
		||||
@@ -71,13 +71,13 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 | 
			
		||||
      // Monitor progress and convergence test 
 | 
			
		||||
      // infrequently to minimise cost overhead
 | 
			
		||||
      if ( i %20 == 0 ) { 
 | 
			
		||||
	RealD plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
 | 
			
		||||
	RealD link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 
 | 
			
		||||
	Real plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
 | 
			
		||||
	Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 
 | 
			
		||||
 | 
			
		||||
	std::cout << GridLogMessage << " Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl;
 | 
			
		||||
	
 | 
			
		||||
	RealD Phi  = 1.0 - old_trace / link_trace ;
 | 
			
		||||
	RealD Omega= 1.0 - trG;
 | 
			
		||||
	Real Phi  = 1.0 - old_trace / link_trace ;
 | 
			
		||||
	Real Omega= 1.0 - trG;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	std::cout << GridLogMessage << " Iteration "<<i<< " Phi= "<<Phi<< " Omega= " << Omega<< " trG " << trG <<std::endl;
 | 
			
		||||
@@ -91,7 +91,7 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  static RealD SteepestDescentStep(std::vector<GaugeMat> &U,RealD & alpha, GaugeMat & dmuAmu) {
 | 
			
		||||
  static Real SteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
 | 
			
		||||
    GridBase *grid = U[0]._grid;
 | 
			
		||||
 | 
			
		||||
    std::vector<GaugeMat> A(Nd,grid);
 | 
			
		||||
@@ -101,19 +101,19 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 | 
			
		||||
    ExpiAlphaDmuAmu(A,g,alpha,dmuAmu);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    RealD vol = grid->gSites();
 | 
			
		||||
    RealD trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
 | 
			
		||||
    Real vol = grid->gSites();
 | 
			
		||||
    Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
 | 
			
		||||
 | 
			
		||||
    SU<Nc>::GaugeTransform(U,g);
 | 
			
		||||
 | 
			
		||||
    return trG;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static RealD FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,RealD & alpha, GaugeMat & dmuAmu) {
 | 
			
		||||
  static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = U[0]._grid;
 | 
			
		||||
 | 
			
		||||
    RealD vol = grid->gSites();
 | 
			
		||||
    Real vol = grid->gSites();
 | 
			
		||||
 | 
			
		||||
    FFT theFFT((GridCartesian *)grid);
 | 
			
		||||
 | 
			
		||||
@@ -139,13 +139,13 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 | 
			
		||||
    std::vector<int> coor(grid->_ndimension,0);
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++) {
 | 
			
		||||
 | 
			
		||||
      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
      Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
      LatticeCoordinate(pmu,mu);
 | 
			
		||||
      pmu = TwoPiL * pmu ;
 | 
			
		||||
      psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5); 
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ComplexD psqMax(16.0);
 | 
			
		||||
    Complex psqMax(16.0);
 | 
			
		||||
    Fp =  psqMax*one/psq;
 | 
			
		||||
 | 
			
		||||
    static int once;
 | 
			
		||||
@@ -160,20 +160,20 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 | 
			
		||||
    theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward);
 | 
			
		||||
 | 
			
		||||
    GaugeMat ciadmam(grid);
 | 
			
		||||
    ComplexD cialpha(0.0,-alpha);
 | 
			
		||||
    Complex cialpha(0.0,-alpha);
 | 
			
		||||
    ciadmam = dmuAmu*cialpha;
 | 
			
		||||
    SU<Nc>::taExp(ciadmam,g);
 | 
			
		||||
 | 
			
		||||
    RealD trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
 | 
			
		||||
    Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
 | 
			
		||||
 | 
			
		||||
    SU<Nc>::GaugeTransform(U,g);
 | 
			
		||||
 | 
			
		||||
    return trG;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,RealD & alpha, GaugeMat &dmuAmu) {
 | 
			
		||||
  static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) {
 | 
			
		||||
    GridBase *grid = g._grid;
 | 
			
		||||
    ComplexD cialpha(0.0,-alpha);
 | 
			
		||||
    Complex cialpha(0.0,-alpha);
 | 
			
		||||
    GaugeMat ciadmam(grid);
 | 
			
		||||
    DmuAmu(A,dmuAmu);
 | 
			
		||||
    ciadmam = dmuAmu*cialpha;
 | 
			
		||||
@@ -193,11 +193,11 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 | 
			
		||||
    ComplexField  pha(grid);
 | 
			
		||||
    GaugeMat      Apha(grid);
 | 
			
		||||
 | 
			
		||||
    ComplexD ci(0.0,1.0);
 | 
			
		||||
    Complex ci(0.0,1.0);
 | 
			
		||||
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
 | 
			
		||||
      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
      Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
      LatticeCoordinate(pmu,mu);
 | 
			
		||||
      pmu = TwoPiL * pmu ;
 | 
			
		||||
      pha = exp(pmu *  (0.5 *ci)); // e(ipmu/2) since Amu(x+mu/2)
 | 
			
		||||
@@ -213,14 +213,14 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 | 
			
		||||
 | 
			
		||||
    ComplexField  pmu(grid);
 | 
			
		||||
    ComplexField  pha(grid);
 | 
			
		||||
    ComplexD ci(0.0,1.0);
 | 
			
		||||
    Complex ci(0.0,1.0);
 | 
			
		||||
    
 | 
			
		||||
    // Sign convention for FFTW calls:
 | 
			
		||||
    // A(x)= Sum_p e^ipx A(p) / V
 | 
			
		||||
    // A(p)= Sum_p e^-ipx A(x)
 | 
			
		||||
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
      Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
      LatticeCoordinate(pmu,mu);
 | 
			
		||||
      pmu = TwoPiL * pmu ;
 | 
			
		||||
      pha = exp(-pmu *  (0.5 *ci)); // e(+ipmu/2) since Amu(x+mu/2)
 | 
			
		||||
@@ -241,7 +241,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  int threads = GridThread::GetThreads();
 | 
			
		||||
 | 
			
		||||
  std::vector<int> latt_size   = GridDefaultLatt();
 | 
			
		||||
  std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1});
 | 
			
		||||
  std::vector<int> simd_layout( { vComplex::Nsimd(),1,1,1});
 | 
			
		||||
  std::vector<int> mpi_layout  = GridDefaultMpi();
 | 
			
		||||
 | 
			
		||||
  int vol = 1;
 | 
			
		||||
@@ -270,16 +270,16 @@ int main (int argc, char ** argv)
 | 
			
		||||
  Uorg=Umu;
 | 
			
		||||
 | 
			
		||||
  SU3::RandomGaugeTransform(pRNG,Umu,g); // Unit gauge
 | 
			
		||||
  RealD plaq=WilsonLoops<PeriodicGimplF>::avgPlaquette(Umu);
 | 
			
		||||
  Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
 | 
			
		||||
  std::cout << " Initial plaquette "<<plaq << std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  RealD alpha=0.1;
 | 
			
		||||
  FourierAcceleratedGaugeFixer<PeriodicGimplF>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-10, 1.0e-10);
 | 
			
		||||
  Real alpha=0.1;
 | 
			
		||||
  FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-10, 1.0e-10);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  plaq=WilsonLoops<PeriodicGimplF>::avgPlaquette(Umu);
 | 
			
		||||
  plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
 | 
			
		||||
  std::cout << " Final plaquette "<<plaq << std::endl;
 | 
			
		||||
 | 
			
		||||
  Uorg = Uorg - Umu;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user