From b812d5e39c57d14f3cc4c769ed1f72cf90691cb4 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 6 Dec 2016 16:31:13 +0000 Subject: [PATCH] Added single threaded version of the derivative for the Ls vectorised DWF --- lib/cartesian/Cartesian_base.h | 48 +++-- lib/lattice/Lattice_rng.h | 75 +++++--- lib/qcd/action/fermion/FermionOperatorImpl.h | 168 +++++++++++------- lib/qcd/action/fermion/WilsonFermion5D.cc | 31 ++-- .../EvenOddSchurDifferentiable.h | 22 ++- .../pseudofermion/TwoFlavourEvenOddRatio.h | 4 +- lib/qcd/hmc/integrators/Integrator.h | 1 + lib/simd/Grid_vector_types.h | 13 ++ lib/tensors/Tensor_class.h | 48 +++++ tests/hmc/Test_hmc_EODWFRatio_Binary.cc | 30 +++- .../hmc/Test_hmc_WilsonFermionGauge_Binary.cc | 2 + 11 files changed, 307 insertions(+), 135 deletions(-) diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 72b21ee3..334914e9 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -99,7 +99,7 @@ public: virtual int oIndex(std::vector &coor) { int idx=0; - // Works with either global or local coordinates + // Works with either global or local coordinates for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); return idx; } @@ -146,15 +146,15 @@ public: // Distance should be either 0,1,2.. // if ( _simd_layout[dimension] > 2 ) { - for(int d=0;d<_ndimension;d++){ - if ( d != dimension ) assert ( (_simd_layout[d]==1) ); - } - permute_type = RotateBit; // How to specify distance; this is not just direction. - return permute_type; + for(int d=0;d<_ndimension;d++){ + if ( d != dimension ) assert ( (_simd_layout[d]==1) ); + } + permute_type = RotateBit; // How to specify distance; this is not just direction. + return permute_type; } for(int d=_ndimension-1;d>dimension;d--){ - if (_simd_layout[d]>1 ) permute_type++; + if (_simd_layout[d]>1 ) permute_type++; } return permute_type; } @@ -174,6 +174,22 @@ public: inline const std::vector &LocalDimensions(void) { return _ldimensions;}; inline const std::vector &VirtualLocalDimensions(void) { return _ldimensions;}; + //////////////////////////////////////////////////////////////// + // Print decomposition + //////////////////////////////////////////////////////////////// + + void show_decomposition(){ + std::cout << GridLogMessage << "Full Dimensions : " << _fdimensions << std::endl; + std::cout << GridLogMessage << "Global Dimensions : " << _gdimensions << std::endl; + std::cout << GridLogMessage << "Local Dimensions : " << _ldimensions << std::endl; + std::cout << GridLogMessage << "Reduced Dimensions : " << _rdimensions << std::endl; + std::cout << GridLogMessage << "iSites : " << _isites << std::endl; + std::cout << GridLogMessage << "oSites : " << _osites << std::endl; + std::cout << GridLogMessage << "lSites : " << lSites() << std::endl; + std::cout << GridLogMessage << "gSites : " << gSites() << std::endl; + std::cout << GridLogMessage << "Nd : " << _ndimension << std::endl; + } + //////////////////////////////////////////////////////////////// // Global addressing //////////////////////////////////////////////////////////////// @@ -187,8 +203,8 @@ public: gidx=0; int mult=1; for(int mu=0;mu<_ndimension;mu++) { - gidx+=mult*gcoor[mu]; - mult*=_gdimensions[mu]; + gidx+=mult*gcoor[mu]; + mult*=_gdimensions[mu]; } } void GlobalCoorToProcessorCoorLocalCoor(std::vector &pcoor,std::vector &lcoor,const std::vector &gcoor) @@ -196,9 +212,9 @@ public: pcoor.resize(_ndimension); lcoor.resize(_ndimension); for(int mu=0;mu<_ndimension;mu++){ - int _fld = _fdimensions[mu]/_processors[mu]; - pcoor[mu] = gcoor[mu]/_fld; - lcoor[mu] = gcoor[mu]%_fld; + int _fld = _fdimensions[mu]/_processors[mu]; + pcoor[mu] = gcoor[mu]/_fld; + lcoor[mu] = gcoor[mu]%_fld; } } void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector &gcoor) @@ -210,9 +226,9 @@ public: std::vector cblcoor(lcoor); for(int d=0;dCheckerBoarded(d) ) { - cblcoor[d] = lcoor[d]/2; - } + if( this->CheckerBoarded(d) ) { + cblcoor[d] = lcoor[d]/2; + } } i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim @@ -238,7 +254,7 @@ public: { RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); if(CheckerBoarded(0)){ - fcoor[0] = fcoor[0]*2+cb; + fcoor[0] = fcoor[0]*2+cb; } } void ProcessorCoorLocalCoorToGlobalCoor(std::vector &Pcoor,std::vector &Lcoor,std::vector &gcoor) diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index cd73b462..c5607ed8 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -6,8 +6,8 @@ Copyright (C) 2015 -Author: Peter Boyle -Author: paboyle + Author: Peter Boyle + Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -67,24 +67,42 @@ namespace Grid { return multiplicity; } + + + inline int RNGfillable_general(GridBase *coarse,GridBase *fine) + { + int rngdims = coarse->_ndimension; + + // trivially extended in higher dims, with locality guaranteeing RNG state is local to node + int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0); + // assumes that the higher dimensions are not using more processors + // all further divisions are local + for(int d=0;d_processors[d]==1); + for(int d=0;d_processors[d] == fine->_processors[d+lowerdims]); + + + // then divide the number of local sites + // check that the total number of sims agree, meanse the iSites are the same + assert(fine->Nsimd() == coarse->Nsimd()); + + // check that the two grids divide cleanly + assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() ); + + return fine->lSites() / coarse->lSites(); + } + // Wrap seed_seq to give common interface with random_device class fixedSeed { public: - typedef std::seed_seq::result_type result_type; - std::seed_seq src; fixedSeed(const std::vector &seeds) : src(seeds.begin(),seeds.end()) {}; result_type operator () (void){ - std::vector list(1); - src.generate(list.begin(),list.end()); - return list[0]; - } }; @@ -252,24 +270,30 @@ namespace Grid { }; class GridParallelRNG : public GridRNGbase { + + double _time_counter; + public: GridBase *_grid; - int _vol; + unsigned int _vol; int generator_idx(int os,int is){ return is*_grid->oSites()+os; } GridParallelRNG(GridBase *grid) : GridRNGbase() { - _grid=grid; - _vol =_grid->iSites()*_grid->oSites(); + _grid = grid; + _vol =_grid->iSites()*_grid->oSites(); _generators.resize(_vol); _uniform.resize(_vol,std::uniform_real_distribution{0,1}); _gaussian.resize(_vol,std::normal_distribution(0.0,1.0) ); _bernoulli.resize(_vol,std::discrete_distribution{1,1}); - _seeded=0; + _seeded = 0; + + _time_counter = 0.0; + } @@ -325,37 +349,36 @@ namespace Grid { typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; - int multiplicity = RNGfillable(_grid, l._grid); + double inner_time_counter = usecond(); - int Nsimd = _grid->Nsimd(); - int osites = _grid->oSites(); + int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid + + int Nsimd = _grid->Nsimd();// guaranteed to be the same for l._grid too + int osites = _grid->oSites();// guaranteed to be <= l._grid->oSites() by a factor multiplicity int words = sizeof(scalar_object) / sizeof(scalar_type); PARALLEL_FOR_LOOP for (int ss = 0; ss < osites; ss++) { std::vector buf(Nsimd); - for (int m = 0; m < multiplicity; - m++) { // Draw from same generator multiplicity times + for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times - int sm = multiplicity * ss + - m; // Maps the generator site to the fine site + int sm = multiplicity * ss + m; // Maps the generator site to the fine site for (int si = 0; si < Nsimd; si++) { int gdx = generator_idx(ss, si); // index of generator state scalar_type *pointer = (scalar_type *)&buf[si]; dist[gdx].reset(); - for (int idx = 0; idx < words; idx++) { - + for (int idx = 0; idx < words; idx++) fillScalar(pointer[idx], dist[gdx], _generators[gdx]); - } } - // merge into SIMD lanes merge(l._odata[sm], buf); } } + + _time_counter += usecond()- inner_time_counter; }; void SeedRandomDevice(void) { @@ -366,6 +389,12 @@ namespace Grid { fixedSeed src(seeds); Seed(src); } + + void Report(){ + std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl; + } + + }; template inline void random(GridParallelRNG &rng,Lattice &l){ diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 0800dea6..be4f629f 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -44,14 +44,14 @@ namespace QCD { // Ultimately need Impl to always define types where XXX is opaque // // typedef typename XXX Simd; - // typedef typename XXX GaugeLinkField; + // typedef typename XXX GaugeLinkField; // typedef typename XXX GaugeField; // typedef typename XXX GaugeActField; // typedef typename XXX FermionField; // typedef typename XXX DoubledGaugeField; // typedef typename XXX SiteSpinor; - // typedef typename XXX SiteHalfSpinor; - // typedef typename XXX Compressor; + // typedef typename XXX SiteHalfSpinor; + // typedef typename XXX Compressor; // // and Methods: // void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) @@ -94,17 +94,17 @@ namespace QCD { //////////////////////////////////////////////////////////////////////// #define INHERIT_FIMPL_TYPES(Impl)\ - typedef typename Impl::FermionField FermionField; \ - typedef typename Impl::DoubledGaugeField DoubledGaugeField; \ - typedef typename Impl::SiteSpinor SiteSpinor; \ - typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ - typedef typename Impl::Compressor Compressor; \ - typedef typename Impl::StencilImpl StencilImpl; \ - typedef typename Impl::ImplParams ImplParams; \ + typedef typename Impl::FermionField FermionField; \ + typedef typename Impl::DoubledGaugeField DoubledGaugeField; \ + typedef typename Impl::SiteSpinor SiteSpinor; \ + typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ + typedef typename Impl::Compressor Compressor; \ + typedef typename Impl::StencilImpl StencilImpl; \ + typedef typename Impl::ImplParams ImplParams; \ typedef typename Impl::Coeff_t Coeff_t; #define INHERIT_IMPL_TYPES(Base) \ - INHERIT_GIMPL_TYPES(Base) \ + INHERIT_GIMPL_TYPES(Base) \ INHERIT_FIMPL_TYPES(Base) ///////////////////////////////////////////////////////////////////////////// @@ -148,11 +148,11 @@ namespace QCD { bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; inline void multLink(SiteHalfSpinor &phi, - const SiteDoubledGaugeField &U, - const SiteHalfSpinor &chi, - int mu, - StencilEntry *SE, - StencilImpl &St) { + const SiteDoubledGaugeField &U, + const SiteHalfSpinor &chi, + int mu, + StencilEntry *SE, + StencilImpl &St) { mult(&phi(), &U(mu), &chi()); } @@ -162,16 +162,16 @@ namespace QCD { } inline void DoubleStore(GridBase *GaugeGrid, - DoubledGaugeField &Uds, - const GaugeField &Umu) { + DoubledGaugeField &Uds, + const GaugeField &Umu) { conformable(Uds._grid, GaugeGrid); conformable(Umu._grid, GaugeGrid); GaugeLinkField U(GaugeGrid); for (int mu = 0; mu < Nd; mu++) { - U = PeekIndex(Umu, mu); - PokeIndex(Uds, U, mu); - U = adj(Cshift(U, mu, -1)); - PokeIndex(Uds, U, mu + 4); + U = PeekIndex(Umu, mu); + PokeIndex(Uds, U, mu); + U = adj(Cshift(U, mu, -1)); + PokeIndex(Uds, U, mu + 4); } } @@ -189,11 +189,11 @@ namespace QCD { PARALLEL_FOR_LOOP for(int sss=0;sssoSites();sss++){ - int sU=sss; - for(int s=0;s(outerProduct(Btilde[sF],Atilde[sF])); // ordering here - } + int sU=sss; + for(int s=0;s(outerProduct(Btilde[sF],Atilde[sF])); // ordering here + } } PokeIndex(mat,tmp,mu); @@ -248,12 +248,12 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres } inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, - const SiteHalfSpinor &chi, int mu, StencilEntry *SE, - StencilImpl &St) { + const SiteHalfSpinor &chi, int mu, StencilEntry *SE, + StencilImpl &St) { SiteGaugeLink UU; for (int i = 0; i < Nrepresentation; i++) { for (int j = 0; j < Nrepresentation; j++) { - vsplat(UU()()(i, j), U(mu)()(i, j)); + vsplat(UU()()(i, j), U(mu)()(i, j)); } } mult(&phi(), &UU(), &chi()); @@ -290,10 +290,40 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres { assert(0); } - - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField Ã, int mu) - { - assert(0); + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { + int LLs = Btilde._grid->_rdimensions[0]; + conformable(Atilde._grid,Btilde._grid); + GaugeLinkField tmp(mat._grid); + tmp = zero; + typedef decltype(traceIndex(outerProduct(Btilde[0], Atilde[0]))) result_type; + std::vector v_scalar_object(Btilde._grid->Nsimd()); + + PARALLEL_FOR_LOOP + for (int sss = 0; sss < tmp._grid->oSites(); sss++) { + std::vector ocoor; + tmp._grid->oCoorFromOindex(ocoor,sss); + for (int si = 0; si < tmp._grid->iSites(); si++){ + typename result_type::scalar_object scalar_object; + scalar_object = zero; + std::vector local_coor(tmp._grid->Nd()); + std::vector icoor; + tmp._grid->iCoorFromIindex(icoor,si); + for (int i = 0; i < tmp._grid->Nd(); i++) local_coor[i] = ocoor[i] + tmp._grid->_rdimensions[i]*icoor[i]; + + for (int s = 0; s < LLs; s++) { + std::vector slocal_coor(Btilde._grid->Nd()); + slocal_coor[0] = s; + for (int s4d = 1; s4d< Btilde._grid->Nd(); s4d++) slocal_coor[s4d] = local_coor[s4d-1]; + int sF = Btilde._grid->oIndexReduced(slocal_coor); + assert(sF < Btilde._grid->oSites()); + extract(traceIndex(outerProduct(Btilde[sF], Atilde[sF])), v_scalar_object); + for (int sv = 0; sv < v_scalar_object.size(); sv++) scalar_object += v_scalar_object[sv]; // sum across the 5d dimension + } + tmp._odata[sss].putlane(scalar_object, si); + } + } + PokeIndex(mat, tmp, mu); } }; @@ -339,19 +369,19 @@ class GparityWilsonImpl : public ConjugateGaugeImplNsimd(); - + int direction = St._directions[mu]; int distance = St._distances[mu]; int ptype = St._permute_type[mu]; @@ -359,13 +389,13 @@ class GparityWilsonImpl : public ConjugateGaugeImpl icoor; - + if ( SE->_around_the_world && Params.twists[mmu] ) { if ( sl == 2 ) { @@ -375,25 +405,25 @@ class GparityWilsonImpl : public ConjugateGaugeImpliCoorFromIindex(icoor,s); - - assert((icoor[direction]==0)||(icoor[direction]==1)); - - int permute_lane; - if ( distance == 1) { - permute_lane = icoor[direction]?1:0; - } else { - permute_lane = icoor[direction]?0:1; - } - - if ( permute_lane ) { - stmp(0) = vals[s](1); - stmp(1) = vals[s](0); - vals[s] = stmp; - } + grid->iCoorFromIindex(icoor,s); + + assert((icoor[direction]==0)||(icoor[direction]==1)); + + int permute_lane; + if ( distance == 1) { + permute_lane = icoor[direction]?1:0; + } else { + permute_lane = icoor[direction]?0:1; + } + + if ( permute_lane ) { + stmp(0) = vals[s](1); + stmp(1) = vals[s](0); + vals[s] = stmp; + } } merge(vtmp,vals); - + } else { vtmp(0) = chi(1); vtmp(1) = chi(0); @@ -418,11 +448,11 @@ class GparityWilsonImpl : public ConjugateGaugeImpl > coor(GaugeGrid); - + for(int mu=0;mu(Umu,mu); Uconj = conjugate(U); @@ -431,13 +461,13 @@ class GparityWilsonImpl : public ConjugateGaugeImpl_fdimensions[0]; - + GaugeLinkField tmp(mat._grid); tmp = zero; PARALLEL_FOR_LOOP diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index d2ac96e3..8b8d1e2d 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -271,11 +271,14 @@ void WilsonFermion5D::DhopDir(const FermionField &in, FermionField &out,in assert(dirdisp<=7); assert(dirdisp>=0); + int LLs = out._grid->_rdimensions[0]; + PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ - for(int s=0;soSites()); Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma); } } @@ -305,6 +308,8 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, DerivCommTime+=usecond(); Atilde=A; + int LLs = B._grid->_rdimensions[0]; + DerivComputeTime-=usecond(); for (int mu = 0; mu < Nd; mu++) { @@ -321,20 +326,18 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, DerivDhopComputeTime -= usecond(); PARALLEL_FOR_LOOP for (int sss = 0; sss < U._grid->oSites(); sss++) { - for (int s = 0; s < Ls; s++) { - int sU = sss; - int sF = s + Ls * sU; - + int sU = sss; + for (int s = 0; s < LLs; s++) { + int sF = s + LLs * sU; assert(sF < B._grid->oSites()); assert(sU < U._grid->oSites()); Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma); - - //////////////////////////// - // spin trace outer product - //////////////////////////// } } + //////////////////////////// + // spin trace outer product + //////////////////////////// DerivDhopComputeTime += usecond(); Impl::InsertForce5D(mat, Btilde, Atilde, mu); } @@ -349,7 +352,7 @@ void WilsonFermion5D::DhopDeriv(GaugeField &mat, { conformable(A._grid,FermionGrid()); conformable(A._grid,B._grid); - conformable(GaugeGrid(),mat._grid); + //conformable(GaugeGrid(),mat._grid); mat.checkerboard = A.checkerboard; @@ -363,7 +366,7 @@ void WilsonFermion5D::DhopDerivEO(GaugeField &mat, int dag) { conformable(A._grid,FermionRedBlackGrid()); - conformable(GaugeRedBlackGrid(),mat._grid); + //conformable(GaugeRedBlackGrid(),mat._grid); conformable(A._grid,B._grid); assert(B.checkerboard==Odd); @@ -381,7 +384,7 @@ void WilsonFermion5D::DhopDerivOE(GaugeField &mat, int dag) { conformable(A._grid,FermionRedBlackGrid()); - conformable(GaugeRedBlackGrid(),mat._grid); + //conformable(GaugeRedBlackGrid(),mat._grid); conformable(A._grid,B._grid); assert(B.checkerboard==Even); diff --git a/lib/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h b/lib/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h index 6837bb19..6965fea9 100644 --- a/lib/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h +++ b/lib/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h @@ -68,8 +68,14 @@ namespace Grid{ assert(U.checkerboard==Odd); assert(V.checkerboard==U.checkerboard); - GaugeField ForceO(ucbgrid); - GaugeField ForceE(ucbgrid); + // NOTE Guido: WE DO NOT WANT TO USE THIS GRID FOR THE FORCE + // INHERIT FROM THE Force field + //GaugeField ForceO(ucbgrid); + //GaugeField ForceE(ucbgrid); + GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid); + GaugeField ForceO(forcecb); + GaugeField ForceE(forcecb); + // X^dag Der_oe MeeInv Meo Y // Use Mooee as nontrivial but gauge field indept @@ -110,8 +116,14 @@ namespace Grid{ assert(V.checkerboard==Odd); assert(V.checkerboard==V.checkerboard); - GaugeField ForceO(ucbgrid); - GaugeField ForceE(ucbgrid); + // NOTE Guido: WE DO NOT WANT TO USE THIS GRID FOR THE FORCE + // INHERIT FROM THE Force field + + //GaugeField ForceO(ucbgrid); + //GaugeField ForceE(ucbgrid); + GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid); + GaugeField ForceO(forcecb); + GaugeField ForceE(forcecb); // X^dag Der_oe MeeInv Meo Y // Use Mooee as nontrivial but gauge field indept @@ -130,6 +142,8 @@ namespace Grid{ setCheckerboard(Force,ForceE); setCheckerboard(Force,ForceO); Force=-Force; + + } }; diff --git a/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h index a8bd9fe7..d7a26695 100644 --- a/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h +++ b/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h @@ -166,7 +166,9 @@ namespace Grid{ FermionField X(NumOp.FermionRedBlackGrid()); FermionField Y(NumOp.FermionRedBlackGrid()); - GaugeField force(NumOp.GaugeGrid()); + //GaugeField force(NumOp.GaugeGrid()); + GaugeField force(dSdU._grid); + conformable(force._grid, dSdU._grid); //Y=Vdag phi //X = (Mdag M)^-1 V^dag phi diff --git a/lib/qcd/hmc/integrators/Integrator.h b/lib/qcd/hmc/integrators/Integrator.h index 6f1e5c45..bc076085 100644 --- a/lib/qcd/hmc/integrators/Integrator.h +++ b/lib/qcd/hmc/integrators/Integrator.h @@ -114,6 +114,7 @@ class Integrator { // Fundamental updates, include smearing for (int a = 0; a < as[level].actions.size(); ++a) { Field force(U._grid); + conformable(U._grid, Mom._grid); Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 42f28b34..fc217cc5 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -386,6 +386,19 @@ class Grid_simd { } } + /////////////////////////////// + // Getting single lanes + /////////////////////////////// + inline Scalar_type getlane(int lane) { + return ((Scalar_type*)&v)[lane]; + } + + inline void putlane(const Scalar_type &S, int lane){ + ((Scalar_type*)&v)[lane] = S; + } + + + }; // end of Grid_simd class definition diff --git a/lib/tensors/Tensor_class.h b/lib/tensors/Tensor_class.h index 916fc09c..cdd57792 100644 --- a/lib/tensors/Tensor_class.h +++ b/lib/tensors/Tensor_class.h @@ -88,6 +88,21 @@ class iScalar { zeroit(*this); return *this; } + + + // managing the internal vector structure + strong_inline scalar_object getlane(int lane){ + scalar_object ret; + ret._internal = _internal.getlane(lane); + return ret; + } + + strong_inline void putlane(scalar_object &s, int lane){ + _internal.putlane(s._internal,lane); + } + + + friend strong_inline void vstream(iScalar &out, const iScalar &in) { vstream(out._internal, in._internal); @@ -226,6 +241,20 @@ class iVector { zeroit(*this); return *this; } + + strong_inline scalar_object getlane(int lane){ + scalar_object ret; + for (int i = 0; i < N; i++) ret._internal[i] = _internal[i].getlane(lane); + return ret; + } + + strong_inline void putlane(scalar_object &s, int lane){ + for (int i = 0; i < N; i++) _internal[i].putlane(s._internal[i],lane); + } + + + + friend strong_inline void zeroit(iVector &that) { for (int i = 0; i < N; i++) { zeroit(that._internal[i]); @@ -349,6 +378,25 @@ class iMatrix { return *this; } + + strong_inline scalar_object getlane(int lane){ + scalar_object ret; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + ret._internal[i][j] = _internal[i][j].getlane(lane); + } + } + return ret; + } + + strong_inline void putlane(scalar_object &s, int lane){ + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) _internal[i][j].putlane(s._internal[i][j],lane); + } + + + + friend strong_inline void zeroit(iMatrix &that){ for(int i=0;i FermionAction; typedef typename FermionAction::FermionField FermionField; - const int Ls = 12; + const int Ls = 8; UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + + GridCartesian* sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi()); + GridRedBlackCartesian* sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); + + + FGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); + FrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); + + /* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + */ // temporarily need a gauge field LatticeGaugeField U(UGrid); // Gauge action double beta = 4.0; - IwasakiGaugeActionR Iaction(beta); + WilsonGaugeActionR Iaction(beta); Real mass = 0.04; Real pv = 1.0; RealD M5 = 1.5; RealD scale = 2.0; - FermionAction DenOp(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,scale); - FermionAction NumOp(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,pv,M5,scale); + FermionAction DenOp(U,*FGrid,*FrbGrid,*sUGrid,*sUrbGrid,mass,M5,scale); + FermionAction NumOp(U,*FGrid,*FrbGrid,*sUGrid,*sUrbGrid,pv,M5,scale); + + std::cout << GridLogMessage << "Frb Osites: " << FrbGrid->oSites() << std::endl; + std::cout << GridLogMessage << "sUGrid Osites: " << sUGrid->oSites() << std::endl; double StoppingCondition = 1.0e-8; double MaxCGIterations = 10000; @@ -94,7 +108,7 @@ public: TwoFlavourEvenOddRatioPseudoFermionAction Nf2(NumOp, DenOp,CG,CG); // Set smearing (true/false), default: false - Nf2.is_smeared = true; + Nf2.is_smeared = false; // Collect actions // here an example of 2 level integration @@ -154,7 +168,7 @@ int main(int argc, char **argv) { // Seeds for the random number generators std::vector SerSeed({1, 2, 3, 4, 5}); - std::vector ParSeed({6, 7, 8, 9, 10}); + std::vector ParSeed({6, 7, 8, 9, 5}); TheHMC.RNGSeeds(SerSeed, ParSeed); TheHMC.MDparameters.set(20, 1.0);// MDsteps, traj length diff --git a/tests/hmc/Test_hmc_WilsonFermionGauge_Binary.cc b/tests/hmc/Test_hmc_WilsonFermionGauge_Binary.cc index 42a903d6..cae53960 100644 --- a/tests/hmc/Test_hmc_WilsonFermionGauge_Binary.cc +++ b/tests/hmc/Test_hmc_WilsonFermionGauge_Binary.cc @@ -156,4 +156,6 @@ int main(int argc, char **argv) { TheHMC.MDparameters.set(10, 1.0);// MDsteps, traj length TheHMC.BuildTheAction(argc, argv); + + Grid_finalize(); }