Merge branch 'develop' into release/v0.6.0

# Conflicts: # tests/core/Test_fft_gfix.cc
2025-08-02 12:47:07 +01:00 · 2016-11-08 16:53:42 +00:00
parent 50d277d8d9 65bcf281d0
commit 8a5e3a917c
5 changed files with 37 additions and 77 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -3,7 +3,7 @@ SUBDIRS = lib benchmarks tests

 .PHONY: tests

-tests:
+tests: all
 	$(MAKE) -C tests tests

 AM_CXXFLAGS += -I$(top_builddir)/include
--- a/44
+++ b/44
@@ -1,44 +0,0 @@
-This library provides data parallel C++ container classes with internal memory layout
-that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
-are provided, similar to HPF and cmfortran, and user control is given over the mapping of
-array indices to both MPI tasks and SIMD processing elements.
-
-* Identically shaped arrays then be processed with perfect data parallelisation.
-* Such identically shapped arrays are called conformable arrays.
-
-The transformation is based on the observation that Cartesian array processing involves
-identical processing to be performed on different regions of the Cartesian array.
-
-The library will (eventually) both geometrically decompose into MPI tasks and across SIMD lanes.
-
-Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but
-optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification
-for most programmers.
-
-The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
-Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
-
-These are presented as 
-
-  vRealF, vRealD, vComplexF, vComplexD 
-
-internal vector data types. These may be useful in themselves for other programmers.
-The corresponding scalar types are named
-
-  RealF, RealD, ComplexF, ComplexD
-
-MPI parallelism is UNIMPLEMENTED and for now only OpenMP and SIMD parallelism is present in the library.
-
-   You can give `configure' initial values for configuration parameters
-by setting variables in the command line or in the environment.  Here
-is are examples:
-
-     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
-
-     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
-
-     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
-
-     ./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
-     
-     
--- a/README.md
+++ b/README.md
@@ -126,7 +126,7 @@ If you want to build all the tests at once just use `make tests`.

 ### Possible communication interfaces

-The following options can be use with the `--enable-simd=` option to target different communication interfaces:
+The following options can be use with the `--enable-comms=` option to target different communication interfaces:

 | `<comm>`       | Description                                                   |
 | -------------- | ------------------------------------------------------------- |
--- a/lib/FFT.h
+++ b/lib/FFT.h
@@ -29,9 +29,13 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #ifndef _GRID_FFT_H_
 #define _GRID_FFT_H_

-#ifdef HAVE_FFTW	
+#ifdef HAVE_FFTW
+#ifdef USE_MKL
+#include <fftw/fftw3.h>
+#else
 #include <fftw3.h>
 #endif
+#endif


 namespace Grid {
--- a/tests/core/Test_fft_gfix.cc
+++ b/tests/core/Test_fft_gfix.cc
@@ -42,7 +42,7 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
  static void GaugeLinkToLieAlgebraField(const std::vector<GaugeMat> &U,std::vector<GaugeMat> &A) {
    for(int mu=0;mu<Nd;mu++){
 //      ImplComplex cmi(0.0,-1.0);
-      ComplexD cmi(0.0,-1.0);
+      Complex cmi(0.0,-1.0);
      A[mu] = Ta(U[mu]) * cmi;
    }
  }
@@ -52,13 +52,13 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
      dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
    }
  }  
-  static void SteepestDescentGaugeFix(GaugeLorentz &Umu,RealD & alpha,int maxiter,RealD Omega_tol, RealD Phi_tol) {
+  static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol) {
    GridBase *grid = Umu._grid;

-    RealD org_plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
-    RealD org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 
-    RealD old_trace = org_link_trace;
-    RealD trG;
+    Real org_plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
+    Real org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 
+    Real old_trace = org_link_trace;
+    Real trG;

    std::vector<GaugeMat> U(Nd,grid);
                 GaugeMat dmuAmu(grid);
@@ -71,13 +71,13 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
      // Monitor progress and convergence test 
      // infrequently to minimise cost overhead
      if ( i %20 == 0 ) { 
-	RealD plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
-	RealD link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 
+	Real plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
+	Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 

 	std::cout << GridLogMessage << " Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl;
 	
-	RealD Phi  = 1.0 - old_trace / link_trace ;
-	RealD Omega= 1.0 - trG;
+	Real Phi  = 1.0 - old_trace / link_trace ;
+	Real Omega= 1.0 - trG;


 	std::cout << GridLogMessage << " Iteration "<<i<< " Phi= "<<Phi<< " Omega= " << Omega<< " trG " << trG <<std::endl;
@@ -91,7 +91,7 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
      }
    }
  };
-  static RealD SteepestDescentStep(std::vector<GaugeMat> &U,RealD & alpha, GaugeMat & dmuAmu) {
+  static Real SteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
    GridBase *grid = U[0]._grid;

    std::vector<GaugeMat> A(Nd,grid);
@@ -101,19 +101,19 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
    ExpiAlphaDmuAmu(A,g,alpha,dmuAmu);


-    RealD vol = grid->gSites();
-    RealD trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
+    Real vol = grid->gSites();
+    Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;

    SU<Nc>::GaugeTransform(U,g);

    return trG;
  }

-  static RealD FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,RealD & alpha, GaugeMat & dmuAmu) {
+  static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {

    GridBase *grid = U[0]._grid;

-    RealD vol = grid->gSites();
+    Real vol = grid->gSites();

    FFT theFFT((GridCartesian *)grid);

@@ -139,13 +139,13 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
    std::vector<int> coor(grid->_ndimension,0);
    for(int mu=0;mu<Nd;mu++) {

-      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
+      Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
      LatticeCoordinate(pmu,mu);
      pmu = TwoPiL * pmu ;
      psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5); 
    }

-    ComplexD psqMax(16.0);
+    Complex psqMax(16.0);
    Fp =  psqMax*one/psq;

    static int once;
@@ -160,20 +160,20 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
    theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward);

    GaugeMat ciadmam(grid);
-    ComplexD cialpha(0.0,-alpha);
+    Complex cialpha(0.0,-alpha);
    ciadmam = dmuAmu*cialpha;
    SU<Nc>::taExp(ciadmam,g);

-    RealD trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
+    Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;

    SU<Nc>::GaugeTransform(U,g);

    return trG;
  }

-  static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,RealD & alpha, GaugeMat &dmuAmu) {
+  static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) {
    GridBase *grid = g._grid;
-    ComplexD cialpha(0.0,-alpha);
+    Complex cialpha(0.0,-alpha);
    GaugeMat ciadmam(grid);
    DmuAmu(A,dmuAmu);
    ciadmam = dmuAmu*cialpha;
@@ -193,11 +193,11 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
    ComplexField  pha(grid);
    GaugeMat      Apha(grid);

-    ComplexD ci(0.0,1.0);
+    Complex ci(0.0,1.0);

    for(int mu=0;mu<Nd;mu++){

-      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
+      Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
      LatticeCoordinate(pmu,mu);
      pmu = TwoPiL * pmu ;
      pha = exp(pmu *  (0.5 *ci)); // e(ipmu/2) since Amu(x+mu/2)
@@ -213,14 +213,14 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {

    ComplexField  pmu(grid);
    ComplexField  pha(grid);
-    ComplexD ci(0.0,1.0);
+    Complex ci(0.0,1.0);
    
    // Sign convention for FFTW calls:
    // A(x)= Sum_p e^ipx A(p) / V
    // A(p)= Sum_p e^-ipx A(x)

    for(int mu=0;mu<Nd;mu++){
-      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
+      Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
      LatticeCoordinate(pmu,mu);
      pmu = TwoPiL * pmu ;
      pha = exp(-pmu *  (0.5 *ci)); // e(+ipmu/2) since Amu(x+mu/2)
@@ -241,7 +241,7 @@ int main (int argc, char ** argv)
  int threads = GridThread::GetThreads();

  std::vector<int> latt_size   = GridDefaultLatt();
-  std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1});
+  std::vector<int> simd_layout( { vComplex::Nsimd(),1,1,1});
  std::vector<int> mpi_layout  = GridDefaultMpi();

  int vol = 1;
@@ -270,16 +270,16 @@ int main (int argc, char ** argv)
  Uorg=Umu;

  SU3::RandomGaugeTransform(pRNG,Umu,g); // Unit gauge
-  RealD plaq=WilsonLoops<PeriodicGimplF>::avgPlaquette(Umu);
+  Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
  std::cout << " Initial plaquette "<<plaq << std::endl;



-  RealD alpha=0.1;
-  FourierAcceleratedGaugeFixer<PeriodicGimplF>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-10, 1.0e-10);
+  Real alpha=0.1;
+  FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-10, 1.0e-10);


-  plaq=WilsonLoops<PeriodicGimplF>::avgPlaquette(Umu);
+  plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
  std::cout << " Final plaquette "<<plaq << std::endl;

  Uorg = Uorg - Umu;