From ce1a115e0b8ff288c3615f8e150ef100053354bc Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 20 Dec 2016 17:51:30 +0000 Subject: [PATCH] Removing redundant arguments for integrator functions, step 1 --- lib/lattice/Lattice_base.h | 19 +++- lib/qcd/hmc/GenericHMCrunner.h | 39 ++++---- lib/qcd/hmc/integrators/Integrator.h | 3 +- lib/simd/Grid_avx.h | 115 +++++++++++------------ lib/supported_compilers.h | 2 +- tests/hmc/Test_hmc_WilsonGauge_Binary.cc | 60 +++++++++++- tests/solver/Test_dwf_cg_prec_LsVec.cc | 6 +- 7 files changed, 152 insertions(+), 92 deletions(-) diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index e4dc1ca8..e6536830 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -255,19 +255,28 @@ PARALLEL_FOR_LOOP } Lattice(const Lattice& r){ // copy constructor - _grid = r._grid; - checkerboard = r.checkerboard; + _grid = r._grid; + checkerboard = r.checkerboard; _odata.resize(_grid->oSites());// essential - PARALLEL_FOR_LOOP + PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ - _odata[ss]=r._odata[ss]; + _odata[ss]=r._odata[ss]; } - } + } virtual ~Lattice(void) = default; + void reset(GridBase* grid) { + if (_grid != grid) { + _grid = grid; + _odata.resize(grid->oSites()); + checkerboard = 0; + } + } + + template strong_inline Lattice & operator = (const sobj & r){ PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ diff --git a/lib/qcd/hmc/GenericHMCrunner.h b/lib/qcd/hmc/GenericHMCrunner.h index 114595b1..2530559f 100644 --- a/lib/qcd/hmc/GenericHMCrunner.h +++ b/lib/qcd/hmc/GenericHMCrunner.h @@ -27,17 +27,17 @@ with this program; if not, write to the Free Software Foundation, Inc., directory *************************************************************************************/ /* END LEGAL */ -#ifndef GENERIC_HMC_RUNNER -#define GENERIC_HMC_RUNNER +#ifndef GRID_GENERIC_HMC_RUNNER +#define GRID_GENERIC_HMC_RUNNER namespace Grid { namespace QCD { - // Virtual Class for HMC specific for gauge theories - // implement a specific theory by defining the BuildTheAction - template - class BinaryHmcRunnerTemplate { - public: +// Virtual Class for HMC specific for gauge theories +// implement a specific theory by defining the BuildTheAction +template +class BinaryHmcRunnerTemplate { +public: INHERIT_FIELD_TYPES(Implementation); typedef Implementation ImplPolicy; @@ -56,8 +56,10 @@ namespace QCD { IntegratorParameters MDparameters; GridCartesian * UGrid; - GridCartesian * FGrid; GridRedBlackCartesian *UrbGrid; + + // These two are unnecessary, eliminate + GridCartesian * FGrid; GridRedBlackCartesian *FrbGrid; std::vector SerialSeed; @@ -68,11 +70,11 @@ namespace QCD { ParallelSeed = P; } - virtual void BuildTheAction(int argc, char **argv) = 0; // necessary? + virtual void BuildTheAction(int argc, char **argv) = 0; // necessary? // A couple of wrapper classes template - void Run(int argc, char **argv, IOCheckpointer &Checkpoint) { + void Run(int argc, char **argv, IOCheckpointer &Checkpoint) { NoSmearing S; Runner(argc, argv, Checkpoint, S); } @@ -83,6 +85,8 @@ namespace QCD { } ////////////////////////////// + + template void Runner(int argc, @@ -141,11 +145,7 @@ namespace QCD { Field U(UGrid); - typedef MinimumNorm2 - IntegratorType; // change here to change the algorithm - + typedef MinimumNorm2 IntegratorType; // change here to change the algorithm IntegratorType MDynamics(UGrid, MDparameters, TheAction, Smearing); HMCparameters HMCpar; @@ -187,7 +187,7 @@ namespace QCD { // Run it HMC.evolve(); } - }; +}; // These are for gauge fields typedef BinaryHmcRunnerTemplate BinaryHmcRunner; @@ -199,6 +199,7 @@ namespace QCD { typedef BinaryHmcRunnerTemplate ScalarBinaryHmcRunner; -} -} -#endif + +} // namespace QCD +} // namespace Grid +#endif diff --git a/lib/qcd/hmc/integrators/Integrator.h b/lib/qcd/hmc/integrators/Integrator.h index db3712c0..48ec746c 100644 --- a/lib/qcd/hmc/integrators/Integrator.h +++ b/lib/qcd/hmc/integrators/Integrator.h @@ -189,7 +189,8 @@ class Integrator { // Initialization of momenta and actions void refresh(Field& U, GridParallelRNG& pRNG) { - assert(P._grid == U._grid); + //assert(P._grid == U._grid); + P.reset(U._grid); std::cout << GridLogIntegrator << "Integrator refresh\n"; FieldImplementation::generate_momenta(P, pRNG); diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index 77927b10..ac6ec9f4 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -1,6 +1,6 @@ /************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid + Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/simd/Grid_avx.h @@ -29,15 +29,6 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -//---------------------------------------------------------------------- -/*! @file Grid_avx.h - @brief Optimization libraries for AVX1/2 instructions set - - Using intrinsics -*/ -// Time-stamp: <2015-06-16 23:30:41 neo> -//---------------------------------------------------------------------- - #include #ifdef AVXFMA4 #include @@ -66,9 +57,9 @@ namespace Optimization { double f[4]; }; - struct Vsplat{ - //Complex float - inline __m256 operator()(float a, float b){ + struct Vsplat{ + // Complex float + inline __m256 operator()(float a, float b) { return _mm256_set_ps(b,a,b,a,b,a,b,a); } // Real float @@ -90,7 +81,7 @@ namespace Optimization { }; struct Vstore{ - //Float + //Float inline void operator()(__m256 a, float* F){ _mm256_store_ps(F,a); } @@ -119,15 +110,15 @@ namespace Optimization { }; struct Vset{ - // Complex float + // Complex float inline __m256 operator()(Grid::ComplexF *a){ return _mm256_set_ps(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),a[1].imag(),a[1].real(),a[0].imag(),a[0].real()); } - // Complex double + // Complex double inline __m256d operator()(Grid::ComplexD *a){ return _mm256_set_pd(a[1].imag(),a[1].real(),a[0].imag(),a[0].real()); } - // Real float + // Real float inline __m256 operator()(float *a){ return _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]); } @@ -144,8 +135,8 @@ namespace Optimization { template struct Reduce{ - //Need templated class to overload output type - //General form must generate error if compiled + // Need templated class to overload output type + // General form must generate error if compiled inline Out_type operator()(In_type in){ printf("Error, using wrong Reduce function\n"); exit(1); @@ -224,7 +215,7 @@ namespace Optimization { ymm1 = _mm256_shuffle_ps(b,b,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // ymm1 <- br,bi ymm2 = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(3,3,1,1)); // ymm2 <- ai,ai ymm1 = _mm256_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi - return _mm256_addsub_ps(ymm0,ymm1); + return _mm256_addsub_ps(ymm0,ymm1); #endif #if defined (AVXFMA4) __m256 a_real = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ar ar, @@ -241,10 +232,10 @@ namespace Optimization { #endif } // Complex double - inline __m256d operator()(__m256d a, __m256d b){ - //Multiplication of (ak+ibk)*(ck+idk) + inline __m256d operator()(__m256d a, __m256d b) { + // Multiplication of (ak+ibk)*(ck+idk) // a + i b can be stored as a data structure - //From intel optimisation reference guide + // From intel optimisation reference guide /* movsldup xmm0, Src1; load real parts into the destination, ; a1, a1, a0, a0 @@ -268,7 +259,7 @@ namespace Optimization { __m256d ymm0,ymm1,ymm2; ymm0 = _mm256_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00 ymm0 = _mm256_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br - ymm1 = _mm256_shuffle_pd(b,b,0x5); // ymm1 <- br,bi b'01,01 + ymm1 = _mm256_shuffle_pd(b,b,0x5); // ymm1 <- br,bi b'01,01 ymm2 = _mm256_shuffle_pd(a,a,0xF); // ymm2 <- ai,ai b'11,11 ymm1 = _mm256_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi return _mm256_addsub_pd(ymm0,ymm1); @@ -365,10 +356,10 @@ namespace Optimization { } }; - struct Div{ + struct Div { // Real float - inline __m256 operator()(__m256 a, __m256 b){ - return _mm256_div_ps(a,b); + inline __m256 operator()(__m256 a, __m256 b) { + return _mm256_div_ps(a, b); } // Real double inline __m256d operator()(__m256d a, __m256d b){ @@ -454,7 +445,7 @@ namespace Optimization { #define _mm256_alignr_epi64_grid(ret,a,b,n) ret=(__m256d) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*8)%16) #endif -#if defined (AVX1) || defined (AVXFMA) +#if defined (AVX1) || defined (AVXFMA) #define _mm256_alignr_epi32_grid(ret,a,b,n) { \ __m128 aa, bb; \ \ @@ -487,7 +478,7 @@ namespace Optimization { struct Rotate{ - static inline __m256 rotate(__m256 in,int n){ + static inline __m256 rotate(__m256 in,int n){ switch(n){ case 0: return tRotate<0>(in);break; case 1: return tRotate<1>(in);break; @@ -500,7 +491,7 @@ namespace Optimization { default: assert(0); } } - static inline __m256d rotate(__m256d in,int n){ + static inline __m256d rotate(__m256d in,int n){ switch(n){ case 0: return tRotate<0>(in);break; case 1: return tRotate<1>(in);break; @@ -509,28 +500,28 @@ namespace Optimization { default: assert(0); } } - - - template - static inline __m256 tRotate(__m256 in){ - __m256 tmp = Permute::Permute0(in); - __m256 ret = in; - if ( n > 3 ) { - _mm256_alignr_epi32_grid(ret,in,tmp,n); - } else { - _mm256_alignr_epi32_grid(ret,tmp,in,n); - } - return ret; - }; + template - static inline __m256d tRotate(__m256d in){ - __m256d tmp = Permute::Permute0(in); - __m256d ret = in; - if ( n > 1 ) { - _mm256_alignr_epi64_grid(ret,in,tmp,n); + static inline __m256 tRotate(__m256 in){ + __m256 tmp = Permute::Permute0(in); + __m256 ret; + if ( n > 3 ) { + _mm256_alignr_epi32_grid(ret,in,tmp,n); } else { - _mm256_alignr_epi64_grid(ret,tmp,in,n); + _mm256_alignr_epi32_grid(ret,tmp,in,n); + } + return ret; + } + + template + static inline __m256d tRotate(__m256d in){ + __m256d tmp = Permute::Permute0(in); + __m256d ret; + if ( n > 1 ) { + _mm256_alignr_epi64_grid(ret,in,tmp,n); + } else { + _mm256_alignr_epi64_grid(ret,tmp,in,n); } return ret; }; @@ -543,7 +534,7 @@ namespace Optimization { __m256 v1,v2; v1=Optimization::Permute::Permute0(in); // avx 256; quad complex single v1= _mm256_add_ps(v1,in); - v2=Optimization::Permute::Permute1(v1); + v2=Optimization::Permute::Permute1(v1); v1 = _mm256_add_ps(v1,v2); u256f conv; conv.v = v1; return Grid::ComplexF(conv.f[0],conv.f[1]); @@ -555,15 +546,15 @@ namespace Optimization { __m256 v1,v2; v1 = Optimization::Permute::Permute0(in); // avx 256; octo-double v1 = _mm256_add_ps(v1,in); - v2 = Optimization::Permute::Permute1(v1); + v2 = Optimization::Permute::Permute1(v1); v1 = _mm256_add_ps(v1,v2); - v2 = Optimization::Permute::Permute2(v1); + v2 = Optimization::Permute::Permute2(v1); v1 = _mm256_add_ps(v1,v2); u256f conv; conv.v=v1; return conv.f[0]; } - - + + //Complex double Reduce template<> inline Grid::ComplexD Reduce::operator()(__m256d in){ @@ -573,14 +564,14 @@ namespace Optimization { u256d conv; conv.v = v1; return Grid::ComplexD(conv.f[0],conv.f[1]); } - + //Real double Reduce template<> inline Grid::RealD Reduce::operator()(__m256d in){ __m256d v1,v2; v1 = Optimization::Permute::Permute0(in); // avx 256; quad double v1 = _mm256_add_pd(v1,in); - v2 = Optimization::Permute::Permute1(v1); + v2 = Optimization::Permute::Permute1(v1); v1 = _mm256_add_pd(v1,v2); u256d conv; conv.v = v1; return conv.f[0]; @@ -593,17 +584,17 @@ namespace Optimization { printf("Reduce : Missing integer implementation -> FIX\n"); assert(0); } - + } ////////////////////////////////////////////////////////////////////////////////////// -// Here assign types +// Here assign types typedef __m256 SIMD_Ftype; // Single precision type typedef __m256d SIMD_Dtype; // Double precision type typedef __m256i SIMD_Itype; // Integer type - // prefecthing + // prefecthing inline void v_prefetch0(int size, const char *ptr){ for(int i=0;i using ReduceSIMD = Optimization::Reduce; + template using ReduceSIMD = Optimization::Reduce; // Arithmetic operations typedef Optimization::Sum SumSIMD; @@ -632,4 +623,4 @@ namespace Optimization { typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; -} +} // namespace Grid diff --git a/lib/supported_compilers.h b/lib/supported_compilers.h index cec68713..b4cd9a78 100644 --- a/lib/supported_compilers.h +++ b/lib/supported_compilers.h @@ -46,4 +46,4 @@ #endif -#endif +#endif // COMPILER_CHECK_H diff --git a/tests/hmc/Test_hmc_WilsonGauge_Binary.cc b/tests/hmc/Test_hmc_WilsonGauge_Binary.cc index 4c650357..d5228ce8 100644 --- a/tests/hmc/Test_hmc_WilsonGauge_Binary.cc +++ b/tests/hmc/Test_hmc_WilsonGauge_Binary.cc @@ -63,6 +63,11 @@ class HMCRunnerParameters : Serializable { class HmcRunner : public BinaryHmcRunner { public: HMCRunnerParameters HMCPar; + void BuildTheAction(int argc, char **argv){} +}; +/* + + // eliminate arcg and argv from here void BuildTheAction(int argc, char **argv) { @@ -90,6 +95,7 @@ class HmcRunner : public BinaryHmcRunner { // Add observables // options for checkpointers + // this can be moved outside the BuildTheAction //BinaryHmcCheckpointer //ILDGHmcCheckpointer //NerscHmcCheckpointer @@ -107,9 +113,11 @@ class HmcRunner : public BinaryHmcRunner { ObservablesList.push_back(&PlaqLog); ObservablesList.push_back(&Checkpoint); + // This must run from here so that the grids are defined Run(argc, argv, Checkpoint); // no smearing }; }; +*/ } } @@ -136,7 +144,57 @@ int main(int argc, char **argv) { TheHMC.MDparameters.set(TheHMC.HMCPar.MDsteps, TheHMC.HMCPar.TrajectorLength); - TheHMC.BuildTheAction(argc, argv); + //TheHMC.BuildTheAction(argc, argv); + + + + // Typedefs to simplify notation + typedef WilsonGaugeActionR GaugeAction; + typedef WilsonImplR ImplPolicy; + typedef WilsonFermionR FermionAction; + typedef typename FermionAction::FermionField FermionField; + + // this can be simplified too. MakeDefaultGrid(Nd) + TheHMC.UGrid = SpaceTimeGrid::makeFourDimGrid( + GridDefaultLatt(), + GridDefaultSimd(Nd, vComplex::Nsimd()), + GridDefaultMpi()); + + + // Gauge action + std::cout << GridLogMessage << "Beta: " << TheHMC.HMCPar.beta << std::endl; + GaugeAction Waction(TheHMC.HMCPar.beta); + + // Collect actions + ActionLevel Level1(1); + Level1.push_back(&Waction); + TheHMC.TheAction.push_back(Level1); + + // Add observables + // options for checkpointers + // this can be moved outside the BuildTheAction + //BinaryHmcCheckpointer + //ILDGHmcCheckpointer + //NerscHmcCheckpointer + NerscHmcCheckpointer Checkpoint( + TheHMC.HMCPar.conf_prefix, TheHMC.HMCPar.rng_prefix, TheHMC.HMCPar.SaveInterval, TheHMC.HMCPar.format); + // Can implement also a specific function in the hmcrunner + // AddCheckpoint (...) that takes the same parameters + a string/tag + // defining the type of the checkpointer + // with tags can be implemented by overloading and no ifs + // Then force all checkpoint to have few common functions + // return an object that is then passed to the Run function + + PlaquetteLogger PlaqLog( + std::string("Plaquette")); + TheHMC.ObservablesList.push_back(&PlaqLog); + TheHMC.ObservablesList.push_back(&Checkpoint); + + // This must run from here so that the grids are defined + TheHMC.Run(argc, argv, Checkpoint); // no smearing + + + Grid_finalize(); } diff --git a/tests/solver/Test_dwf_cg_prec_LsVec.cc b/tests/solver/Test_dwf_cg_prec_LsVec.cc index 3fbec4d3..dfa0a194 100644 --- a/tests/solver/Test_dwf_cg_prec_LsVec.cc +++ b/tests/solver/Test_dwf_cg_prec_LsVec.cc @@ -67,7 +67,7 @@ int main(int argc, char** argv) { GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - std::cout << GridLogMessage << "Generating random ferrmion field" << std::endl; + std::cout << GridLogMessage << "Generating random fermion field" << std::endl; LatticeFermion src(FGrid); random(RNG5, src); LatticeFermion result(FGrid); @@ -96,7 +96,7 @@ int main(int argc, char** argv) { GridStopWatch CGTimer; SchurDiagMooeeOperator HermOpEO(Ddwf); - ConjugateGradient CG(1.0e-8, 10000, 0);// switch off the assert + ConjugateGradient CG(1.0e-8, 10000, 0); // switch off the assert Ddwf.ZeroCounters(); CGTimer.Start(); @@ -110,4 +110,4 @@ int main(int argc, char** argv) { Ddwf.Report(); Grid_finalize(); -} +}