From b5510427f996a5cf0450d2f452e0788074191144 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 18 Apr 2018 01:43:29 +0100 Subject: [PATCH 1/6] physical fermion interface, cshift benchmark in SU3. --- benchmarks/Benchmark_su3.cc | 44 ++++++++++++++++++++--- lib/qcd/action/fermion/CayleyFermion5D.cc | 29 +++++++++++++++ lib/qcd/action/fermion/CayleyFermion5D.h | 5 +++ lib/qcd/action/fermion/FermionOperator.h | 13 +++++++ 4 files changed, 86 insertions(+), 5 deletions(-) diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 035af2d9..628ad5bd 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -35,7 +35,8 @@ using namespace Grid::QCD; int main (int argc, char ** argv) { Grid_init(&argc,&argv); -#define LMAX (64) +#define LMAX (40) +#define LINC (4) int64_t Nloop=20; @@ -51,7 +52,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -83,7 +84,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -114,7 +115,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -145,7 +146,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -170,5 +171,38 @@ int main (int argc, char ** argv) } + + std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); + + for(int mu=0;mu<=4;mu++){ + double start=usecond(); + for(int64_t i=0;i +void CayleyFermion5D::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d) +{ + int Ls = this->Ls; + FermionField tmp(this->FermionGrid()); + tmp = solution5d; + conformable(solution5d._grid,this->FermionGrid()); + conformable(exported4d._grid,this->GaugeGrid()); + axpby_ssp_pminus(tmp, 0., solution5d, 1., solution5d, 0, 0); + axpby_ssp_pplus (tmp, 1., tmp , 1., solution5d, 0, Ls-1); + ExtractSlice(exported4d, tmp, 0, 0); +} +template +void CayleyFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) +{ + int Ls = this->Ls; + FermionField tmp(this->FermionGrid()); + conformable(imported5d._grid,this->FermionGrid()); + conformable(input4d._grid ,this->GaugeGrid()); + tmp = zero; + InsertSlice(input4d, tmp, 0 , 0); + InsertSlice(input4d, tmp, Ls-1, 0); + axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, 0, 0); + axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); + Dminus(tmp,imported5d); +} template void CayleyFermion5D::Dminus(const FermionField &psi, FermionField &chi) { diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h index ef75235a..b370b09d 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.h +++ b/lib/qcd/action/fermion/CayleyFermion5D.h @@ -83,8 +83,13 @@ namespace Grid { virtual void M5D (const FermionField &psi, FermionField &chi); virtual void M5Ddag(const FermionField &psi, FermionField &chi); + /////////////////////////////////////////////////////////////// + // Physical surface field utilities + /////////////////////////////////////////////////////////////// virtual void Dminus(const FermionField &psi, FermionField &chi); virtual void DminusDag(const FermionField &psi, FermionField &chi); + virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); + virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d); ///////////////////////////////////////////////////// // Instantiate different versions depending on Impl diff --git a/lib/qcd/action/fermion/FermionOperator.h b/lib/qcd/action/fermion/FermionOperator.h index 1d395d53..5be36f13 100644 --- a/lib/qcd/action/fermion/FermionOperator.h +++ b/lib/qcd/action/fermion/FermionOperator.h @@ -128,6 +128,19 @@ namespace Grid { std::vector mom, unsigned int tmin, unsigned int tmax)=0; + /////////////////////////////////////////////// + // Physical field import/export + /////////////////////////////////////////////// + virtual void Dminus(const FermionField &psi, FermionField &chi) { chi=psi; } + virtual void DminusDag(const FermionField &psi, FermionField &chi) { chi=psi; } + virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported) + { + imported = input; + }; + virtual void ExportPhysicalFermionSolution(const FermionField &solution,FermionField &exported) + { + exported=solution; + }; }; } From 870b1a85aebc8bf545a162053fb04823fd937fa2 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 18 Apr 2018 14:17:49 +0100 Subject: [PATCH 2/6] Think I have the physical prop interface to CF and PF overlap right, but need a strong check/regression. Only support Hw overlap, not Ht for now. Ht needs a new Dminus implemented. --- .../fermion/ContinuedFractionFermion5D.cc | 21 +++++++++++++++++++ .../fermion/ContinuedFractionFermion5D.h | 8 +++++++ .../fermion/PartialFractionFermion5D.cc | 21 +++++++++++++++++++ .../action/fermion/PartialFractionFermion5D.h | 6 ++++++ 4 files changed, 56 insertions(+) diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc index 5d39ef9b..f6857115 100644 --- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc @@ -295,6 +295,27 @@ namespace Grid { assert((Ls&0x1)==1); // Odd Ls required } + template + void ContinuedFractionFermion5D::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d) + { + int Ls = this->Ls; + conformable(solution5d._grid,this->FermionGrid()); + conformable(exported4d._grid,this->GaugeGrid()); + ExtractSlice(exported4d, solution5d, Ls-1, Ls-1); + } + template + void ContinuedFractionFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) + { + int Ls = this->Ls; + conformable(imported5d._grid,this->FermionGrid()); + conformable(input4d._grid ,this->GaugeGrid()); + FermionField tmp(this->FermionGrid()); + tmp=zero; + InsertSlice(input4d, tmp, Ls-1, Ls-1); + tmp=Gamma(Gamma::Algebra::Gamma5)*tmp; + this->Dminus(tmp,imported5d); + } + FermOpTemplateInstantiate(ContinuedFractionFermion5D); } diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h index e1e50aa5..b551fc28 100644 --- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -65,6 +65,14 @@ namespace Grid { // Efficient support for multigrid coarsening virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + /////////////////////////////////////////////////////////////// + // Physical surface field utilities + /////////////////////////////////////////////////////////////// + // virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case + // virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case + virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); + virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); + // Constructors ContinuedFractionFermion5D(GaugeField &_Umu, GridCartesian &FiveDimGrid, diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.cc b/lib/qcd/action/fermion/PartialFractionFermion5D.cc index 3a78e043..11840027 100644 --- a/lib/qcd/action/fermion/PartialFractionFermion5D.cc +++ b/lib/qcd/action/fermion/PartialFractionFermion5D.cc @@ -396,6 +396,27 @@ namespace Grid { amax=zolo_hi; } + template + void PartialFractionFermion5D::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d) + { + int Ls = this->Ls; + conformable(solution5d._grid,this->FermionGrid()); + conformable(exported4d._grid,this->GaugeGrid()); + ExtractSlice(exported4d, solution5d, Ls-1, Ls-1); + } + template + void PartialFractionFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) + { + int Ls = this->Ls; + conformable(imported5d._grid,this->FermionGrid()); + conformable(input4d._grid ,this->GaugeGrid()); + FermionField tmp(this->FermionGrid()); + tmp=zero; + InsertSlice(input4d, tmp, Ls-1, Ls-1); + tmp=Gamma(Gamma::Algebra::Gamma5)*tmp; + this->Dminus(tmp,imported5d); + } + // Constructors template PartialFractionFermion5D::PartialFractionFermion5D(GaugeField &_Umu, diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.h b/lib/qcd/action/fermion/PartialFractionFermion5D.h index 0ec72de4..91f1bd3c 100644 --- a/lib/qcd/action/fermion/PartialFractionFermion5D.h +++ b/lib/qcd/action/fermion/PartialFractionFermion5D.h @@ -70,6 +70,12 @@ namespace Grid { // Efficient support for multigrid coarsening virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + /////////////////////////////////////////////////////////////// + // Physical surface field utilities + /////////////////////////////////////////////////////////////// + virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); + virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); + // Constructors PartialFractionFermion5D(GaugeField &_Umu, GridCartesian &FiveDimGrid, From c11a3ca0a74096ad2eea03487105fbc7924b625a Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 20 Apr 2018 17:13:04 +0100 Subject: [PATCH 3/6] vectorise/unvectorise in reverse order --- lib/lattice/Lattice_transfer.h | 93 ++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 44f0337d..f988f310 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -599,6 +599,51 @@ unvectorizeToLexOrdArray(std::vector &out, const Lattice &in) extract1(in_vobj, out_ptrs, 0); } } + +template +typename std::enable_if::value && !isSIMDvectorized::value, void>::type +unvectorizeToRevLexOrdArray(std::vector &out, const Lattice &in) +{ + + typedef typename vobj::vector_type vtype; + + GridBase* in_grid = in._grid; + out.resize(in_grid->lSites()); + + int ndim = in_grid->Nd(); + int in_nsimd = vtype::Nsimd(); + + std::vector > in_icoor(in_nsimd); + + for(int lane=0; lane < in_nsimd; lane++){ + in_icoor[lane].resize(ndim); + in_grid->iCoorFromIindex(in_icoor[lane], lane); + } + + parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index + //Assemble vector of pointers to output elements + std::vector out_ptrs(in_nsimd); + + std::vector in_ocoor(ndim); + in_grid->oCoorFromOindex(in_ocoor, in_oidx); + + std::vector lcoor(in_grid->Nd()); + + for(int lane=0; lane < in_nsimd; lane++){ + for(int mu=0;mu_rdimensions[mu]*in_icoor[lane][mu]; + + int lex; + Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions); + out_ptrs[lane] = &out[lex]; + } + + //Unpack into those ptrs + const vobj & in_vobj = in._odata[in_oidx]; + extract1(in_vobj, out_ptrs, 0); + } +} + //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order template typename std::enable_if::value @@ -648,6 +693,54 @@ vectorizeFromLexOrdArray( std::vector &in, Lattice &out) } } +template +typename std::enable_if::value + && !isSIMDvectorized::value, void>::type +vectorizeFromRevLexOrdArray( std::vector &in, Lattice &out) +{ + + typedef typename vobj::vector_type vtype; + + GridBase* grid = out._grid; + assert(in.size()==grid->lSites()); + + int ndim = grid->Nd(); + int nsimd = vtype::Nsimd(); + + std::vector > icoor(nsimd); + + for(int lane=0; lane < nsimd; lane++){ + icoor[lane].resize(ndim); + grid->iCoorFromIindex(icoor[lane],lane); + } + + parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index + //Assemble vector of pointers to output elements + std::vector ptrs(nsimd); + + std::vector ocoor(ndim); + grid->oCoorFromOindex(ocoor, oidx); + + std::vector lcoor(grid->Nd()); + + for(int lane=0; lane < nsimd; lane++){ + + for(int mu=0;mu_rdimensions[mu]*icoor[lane][mu]; + } + + int lex; + Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions); + ptrs[lane] = &in[lex]; + } + + //pack from those ptrs + vobj vecobj; + merge1(vecobj, ptrs, 0); + out._odata[oidx] = vecobj; + } +} + //Convert a Lattice from one precision to another template void precisionChange(Lattice &out, const Lattice &in){ From 94edf9cf8be8eb2fe8990db39c3c07c4a38bd71b Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 20 Apr 2018 17:13:21 +0100 Subject: [PATCH 4/6] HDF5: direct access to group for custom operations --- lib/serialisation/Hdf5IO.cc | 10 ++++++++++ lib/serialisation/Hdf5IO.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/lib/serialisation/Hdf5IO.cc b/lib/serialisation/Hdf5IO.cc index 1fb7be0c..b915a988 100644 --- a/lib/serialisation/Hdf5IO.cc +++ b/lib/serialisation/Hdf5IO.cc @@ -55,6 +55,11 @@ void Hdf5Writer::writeDefault(const std::string &s, const char *x) writeDefault(s, sx); } +Group & Hdf5Writer::getGroup(void) +{ + return group_; +} + // Reader implementation /////////////////////////////////////////////////////// Hdf5Reader::Hdf5Reader(const std::string &fileName) : fileName_(fileName) @@ -103,3 +108,8 @@ void Hdf5Reader::readDefault(const std::string &s, std::string &x) x.resize(strType.getSize()); attribute.read(strType, &(x[0])); } + +Group & Hdf5Reader::getGroup(void) +{ + return group_; +} diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 12625ab8..1ae2791e 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -38,6 +38,7 @@ namespace Grid template typename std::enable_if>::is_number, void>::type writeDefault(const std::string &s, const std::vector &x); + H5NS::Group & getGroup(void); private: template void writeSingleAttribute(const U &x, const std::string &name, @@ -65,6 +66,7 @@ namespace Grid template typename std::enable_if>::is_number, void>::type readDefault(const std::string &s, std::vector &x); + H5NS::Group & getGroup(void); private: template void readSingleAttribute(U &x, const std::string &name, From 141da3ae71b255d0c175016a2e2dc03ee241487a Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 20 Apr 2018 17:13:34 +0100 Subject: [PATCH 5/6] function to get tensor dimensions --- lib/serialisation/VectorUtils.h | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/lib/serialisation/VectorUtils.h b/lib/serialisation/VectorUtils.h index f5c76b84..53088998 100644 --- a/lib/serialisation/VectorUtils.h +++ b/lib/serialisation/VectorUtils.h @@ -30,6 +30,48 @@ namespace Grid { typedef typename std::vector::type>> type; }; + template + void tensorDim(std::vector &dim, const T &t, const bool wipe = true) + { + if (wipe) + { + dim.clear(); + } + } + + template + void tensorDim(std::vector &dim, const iScalar &t, const bool wipe = true) + { + if (wipe) + { + dim.clear(); + } + tensorDim(dim, t._internal, false); + } + + template + void tensorDim(std::vector &dim, const iVector &t, const bool wipe = true) + { + if (wipe) + { + dim.clear(); + } + dim.push_back(N); + tensorDim(dim, t._internal[0], false); + } + + template + void tensorDim(std::vector &dim, const iMatrix &t, const bool wipe = true) + { + if (wipe) + { + dim.clear(); + } + dim.push_back(N); + dim.push_back(N); + tensorDim(dim, t._internal[0][0], false); + } + template typename TensorToVec::type tensorToVec(const T &t) { From a1be53332956d4eb074632ef10a1449a0aac582e Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 24 Apr 2018 01:19:53 -0700 Subject: [PATCH 6/6] Corrected Flop count in Benchmark su3 and expanded the Wilson flow output --- benchmarks/Benchmark_su3.cc | 4 ++-- lib/qcd/smearing/WilsonFlow.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 628ad5bd..5f088fdc 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -166,7 +166,7 @@ int main (int argc, char ** argv) double time = (stop-start)/Nloop*1000.0; double bytes=3*vol*Nc*Nc*sizeof(Complex); - double flops=Nc*Nc*(8+8+8)*vol; + double flops=Nc*Nc*(6+8+8)*vol; std::cout<::smear(GaugeField& out, const GaugeField& in) const { std::cout << "Time to evolve " << diff.count() << " s\n"; #endif std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " - << step << " " - << energyDensityPlaquette(step,out) << std::endl; + << step << " " << tau(step) << " " + << energyDensityPlaquette(step,out) << std::endl; if( step % measure_interval == 0){ std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " @@ -193,8 +193,8 @@ void WilsonFlow::smear_adaptive(GaugeField& out, const GaugeField& in, Re //std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl; evolve_step_adaptive(out, maxTau); std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " - << step << " " - << energyDensityPlaquette(out) << std::endl; + << step << " " << taus << " " + << energyDensityPlaquette(out) << std::endl; if( step % measure_interval == 0){ std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " "