From 85b2bd4c93314b759b426278f314ed4dec195abd Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 7 Oct 2025 16:11:06 -0400 Subject: [PATCH] Beginnings of S2xR --- Grid/cartesian/Cartesian.h | 1 + Grid/cartesian/CartesianCrossIcosahedron.h | 197 +++++++ Grid/cartesian/Cartesian_base.h | 13 +- Grid/cshift/Cshift_mpi.h | 2 + Grid/cshift/Cshift_none.h | 1 + Grid/lattice/Lattice_base.h | 42 +- Grid/lattice/Lattice_coordinate.h | 86 +++- Grid/lattice/Lattice_peekpoke.h | 132 ++++- .../fermion/TwoSpinWilsonFermion3plus1D.h | 175 +++++++ Grid/qcd/action/fermion/TwoSpinWilsonImpl.h | 222 ++++++++ .../qcd/action/fermion/TwoSpinWilsonKernels.h | 84 +++ ...woSpinWilsonFermion3plus1DImplementation.h | 486 ++++++++++++++++++ .../TwoSpinWilsonKernelsImplementation.h | 441 ++++++++++++++++ ...woSpinWilsonFermion3plus1DInstantiation.cc | 61 +++ ...ilsonFermion3plus1DInstantiation.cc.master | 40 ++ ...woSpinWilsonKernelsInstantiation.cc.master | 40 ++ Grid/qcd/spin/Pauli.h | 220 ++++++++ TODO | 5 + tests/debug/Test_icosahedron.cc | 93 ++++ 19 files changed, 2324 insertions(+), 17 deletions(-) create mode 100644 Grid/cartesian/CartesianCrossIcosahedron.h create mode 100644 Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h create mode 100644 Grid/qcd/action/fermion/TwoSpinWilsonImpl.h create mode 100644 Grid/qcd/action/fermion/TwoSpinWilsonKernels.h create mode 100644 Grid/qcd/action/fermion/implementation/TwoSpinWilsonFermion3plus1DImplementation.h create mode 100644 Grid/qcd/action/fermion/implementation/TwoSpinWilsonKernelsImplementation.h create mode 100644 Grid/qcd/action/fermion/instantiation/TwoSpinWilsonFermion3plus1DInstantiation.cc create mode 100644 Grid/qcd/action/fermion/instantiation/TwoSpinWilsonFermion3plus1DInstantiation.cc.master create mode 100644 Grid/qcd/action/fermion/instantiation/TwoSpinWilsonKernelsInstantiation.cc.master create mode 100644 Grid/qcd/spin/Pauli.h create mode 100644 tests/debug/Test_icosahedron.cc diff --git a/Grid/cartesian/Cartesian.h b/Grid/cartesian/Cartesian.h index 070cad95..79fef8ec 100644 --- a/Grid/cartesian/Cartesian.h +++ b/Grid/cartesian/Cartesian.h @@ -31,5 +31,6 @@ Author: Peter Boyle #include #include #include +#include #endif diff --git a/Grid/cartesian/CartesianCrossIcosahedron.h b/Grid/cartesian/CartesianCrossIcosahedron.h new file mode 100644 index 00000000..76b17a3e --- /dev/null +++ b/Grid/cartesian/CartesianCrossIcosahedron.h @@ -0,0 +1,197 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/cartesian/CartesianCrossIcosahedron.h + + Copyright (C) 2025 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +///////////////////////////////////////////////////////////////////////////////////////// +// Grid Support. +///////////////////////////////////////////////////////////////////////////////////////// + +enum IcosahedralMeshType { + IcosahedralVertices, + IcosahedralEdges +} ; +enum NorthSouth { + North = 1, + South = 0 +}; + +const int num_icosahedron_tiles = 10; + +class GridCartesianCrossIcosahedron: public GridCartesian { + +public: + + IcosahedralMeshType meshType; + + IcosahedralMeshType MeshType(void) { return meshType; }; + + ///////////////////////////////////////////////////////////////////////// + // Constructor takes a parent grid and possibly subdivides communicator. + ///////////////////////////////////////////////////////////////////////// + /* + GridCartesian(const Coordinate &dimensions, + const Coordinate &simd_layout, + const Coordinate &processor_grid, + const GridCartesian &parent) : GridBase(processor_grid,parent,dummy) + { + assert(0); // No subdivision + } + GridCartesian(const Coordinate &dimensions, + const Coordinate &simd_layout, + const Coordinate &processor_grid, + const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank) + { + assert(0); // No subdivision + } + */ + ///////////////////////////////////////////////////////////////////////// + // Construct from comm world + ///////////////////////////////////////////////////////////////////////// + GridCartesianCrossIcosahedron(const Coordinate &dimensions, + const Coordinate &simd_layout, + const Coordinate &processor_grid, + IcosahedralMeshType _meshType) : GridCartesian(dimensions,simd_layout,processor_grid) + { + meshType = _meshType; + Coordinate S2dimensions=dimensions; + Coordinate S2simd =simd_layout; + Coordinate S2procs =processor_grid; + + assert(simd_layout[0]==1); // Force simd into perpendicular dimensions + assert(simd_layout[1]==1); // to avoid pole storage complexity interacting with SIMD. + assert(dimensions[_ndimension-1]==num_icosahedron_tiles); + assert(processor_grid[_ndimension-1]<=2); // Keeps the patches that need a pole on the same node + + // allocate the pole storage if we are seeking vertex domain data + if ( meshType == IcosahedralVertices ) { + InitPoles(); + } + } + + virtual ~GridCartesianCrossIcosahedron() = default; + + //////////////////////////////////////////////// + // Use to decide if a given grid is icosahedral + //////////////////////////////////////////////// + int hasNorthPole; + int hasSouthPole; + int northPoleOsite; + int southPoleOsite; + int northPoleOsites; + int southPoleOsites; + + virtual int Icosahedral(void) override { return 1;} + virtual int ownsNorthPole(void) const override { return hasNorthPole; }; + virtual int NorthPoleOsite(void) const override { return northPoleOsite; }; + virtual int NorthPoleOsites(void) const override { return northPoleOsites; }; + virtual int ownsSouthPole(void) const override { return hasSouthPole; }; + virtual int SouthPoleOsite(void) const override { return southPoleOsite; }; + virtual int SouthPoleOsites(void) const override { return southPoleOsites; }; + + void InitPoles(void) + { + int Ndm1 = _ndimension-1; + /////////////////////// + // Add the extra pole storage + /////////////////////// + // Vertices = 1x LxLx D1...Dn + 2.D1...Dn + // Start after the LxL and don't include the 10 patch dim + int OrthogSize = 1; + for (int d = 2; d < Ndm1; d++) { + OrthogSize *= _gdimensions[d]; + } + _fsites += OrthogSize*2; + _gsites += OrthogSize*2; + + // Simd reduced sizes are multiplied up. + // If the leading LxL are simd-ized, the vector objects will contain "redundant" lanes + // which should contain identical north (south) pole data + OrthogSize = 1; + for (int d = 2; d < Ndm1; d++) { + OrthogSize *= _rdimensions[d]; + } + + // Grow the local volume to hold pole data + // on rank (0,0) in the LxL planes + // since SIMD must be placed in the orthogonal directions + Coordinate pcoor = this->ThisProcessorCoor(); + Coordinate pgrid = this->ProcessorGrid(); + + const int xdim=0; + const int ydim=1; + /* + * + * /\/\/\/\/\ + * /\/\/\/\/\/ + * \/\/\/\/\/ + * + * y + * / + * \x + * + * Labelling patches as 5 6 7 8 9 + * 0 1 2 3 4 + * + * Will ban distribution of the patch dimension by more than 2. + * + * Hence all 5 patches associated with the pole must have the + * appropriate "corner" of the patch L^2 located on the SAME rank. + */ + + if( (pcoor[xdim]==pgrid[xdim]-1) && (pcoor[ydim]==0) && (pcoor[Ndm1]==0) ){ + hasSouthPole =1; + southPoleOsite=this->_osites; + southPoleOsites=OrthogSize; + this->_osites += OrthogSize; + } else { + hasSouthPole =0; + southPoleOsites=0; + southPoleOsite=0; + } + if( (pcoor[xdim]==0) && (pcoor[ydim]==pgrid[ydim]-1) && (pcoor[Ndm1]==pgrid[Ndm1]-1) ){ + hasNorthPole =1; + northPoleOsite=this->_osites; + northPoleOsites=OrthogSize; + this->_osites += OrthogSize; + } else { + hasNorthPole =0; + northPoleOsites=0; + northPoleOsite=0; + } + std::cout << "Icosahedral vertex field volume " << this->_osites<southPoleOsite<northPoleOsite<southPoleOsites<northPoleOsites< Lattice Cshift(const Lattice &rhs,int dimension,int shift) { + assert(!rhs.Grid()->Icosahedral()); + typedef typename vobj::vector_type vector_type; typedef typename vobj::scalar_type scalar_type; diff --git a/Grid/cshift/Cshift_none.h b/Grid/cshift/Cshift_none.h index d3c0bfa0..79a360b4 100644 --- a/Grid/cshift/Cshift_none.h +++ b/Grid/cshift/Cshift_none.h @@ -30,6 +30,7 @@ Author: Peter Boyle NAMESPACE_BEGIN(Grid); template Lattice Cshift(const Lattice &rhs,int dimension,int shift) { + assert(!rhs.Grid()->Icosahedral()); Lattice ret(rhs.Grid()); ret.Checkerboard() = rhs.Grid()->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension); Cshift_local(ret,rhs,dimension,shift); diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index d8b9a618..e6b0603f 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -373,14 +373,17 @@ public: template std::ostream& operator<< (std::ostream& stream, const Lattice &o){ typedef typename vobj::scalar_object sobj; - for(int64_t g=0;g_gsites;g++){ + uint64_t gsites=1; + uint64_t polesites=0; + for(int d=0;d_ndimension;d++) gsites *= o.Grid()->_gdimensions[d]; + for(int64_t g=0;gGlobalIndexToGlobalCoor(g,gcoor); sobj ss; peekSite(ss,o,gcoor); - stream<<"["; + stream<<"["<< g<<" : "; for(int d=0;d std::ostream& operator<< (std::ostream& stream, const Latti stream<<"]\t"; stream<Icosahedral() ) { + uint64_t psites=1; + Coordinate perpdims; + for(int d=2;d_ndimension-1;d++){ + int pd=o.Grid()->_gdimensions[d]; + psites*=pd; + perpdims.push_back(pd); + } + for(uint64_t p=0;p inline void LatticeCoordinate(Lattice &l,int mu) typedef typename iobj::scalar_type scalar_type; typedef typename iobj::vector_type vector_type; + l=Zero(); + GridBase *grid = l.Grid(); int Nsimd = grid->iSites(); - autoView(l_v, l, CpuWrite); - thread_for( o, grid->oSites(), { - vector_type vI; - Coordinate gcoor; - ExtractBuffer mergebuf(Nsimd); - for(int i=0;iiSites();i++){ - grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor); - mergebuf[i]=(Integer)gcoor[mu]; + int cartesian_vol = grid->oSites(); + if ( grid->Icosahedral() ) { + cartesian_vol = cartesian_vol - grid->NorthPoleOsites()-grid->SouthPoleOsites(); + } + { + autoView(l_v, l, CpuWrite); + thread_for( o, cartesian_vol, { + vector_type vI; + Coordinate gcoor; + ExtractBuffer mergebuf(Nsimd); + for(int i=0;iiSites();i++){ + grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor); + mergebuf[i]=(Integer)gcoor[mu]; + } + merge(vI,mergebuf); + l_v[o]=vI; + }); + } + + if (grid->Icosahedral()) { + uint64_t psites=1; + Coordinate perpdims; + typename iobj::scalar_object ss; + for(int d=2;d_ndimension-1;d++){ + int pd=grid->_gdimensions[d]; + psites*=pd; + perpdims.push_back(pd); } - merge(vI,mergebuf); - l_v[o]=vI; - }); + for(uint64_t p=0;p=2 && mu < grid->_ndimension-1) { + icoor = orthog[mu-2]; + } else { + icoor = -1; + } + + ss=scalar_type(icoor); + + pokePole(ss,l,orthog,South); + pokePole(ss,l,orthog,North); + } + } +}; +template inline void LatticePole(Lattice &l,NorthSouth pole) +{ + typedef typename iobj::scalar_object sobj; + typedef typename iobj::scalar_type scalar_type; + typedef typename iobj::vector_type vector_type; + + GridBase *grid = l.Grid(); + + l=Zero(); + + if (grid->Icosahedral()) { + uint64_t psites=1; + Coordinate perpdims; + sobj ss; + scalar_type one(1.0); + ss=one; + for(int d=2;d_ndimension-1;d++){ + int pd=l.Grid()->_gdimensions[d]; + psites*=pd; + perpdims.push_back(pd); + } + for(uint64_t p=0;p &l,const Coordinate &site){ grid->GlobalCoorToRankIndex(rank,odx,idx,site); ExtractBuffer buf(Nsimd); - autoView( l_v , l, CpuWrite); + autoView( l_v , l, CpuRead); extract(l_v[odx],buf); s = buf[idx]; @@ -151,6 +151,134 @@ void peekSite(sobj &s,const Lattice &l,const Coordinate &site){ return; }; +// zero for south pole, one for north pole +template +void peekPole(sobj &s,const Lattice &l,const Coordinate &orthog,NorthSouth isNorth) +{ + s=Zero(); + + GridBase *grid=l.Grid(); + + assert(grid->Icosahedral()); + + int Nsimd = grid->Nsimd(); + + int rank; + + int Ndm1 = grid->_ndimension-1; + Coordinate pgrid = grid->ProcessorGrid(); + const int xdim=0; + const int ydim=1; + const int pdim=Ndm1; + + int64_t pole_osite; + int64_t pole_isite; + Coordinate rdims; + Coordinate idims; + Coordinate ocoor; + Coordinate icoor; + Coordinate pcoor(grid->_ndimension); + for(int d=2;d_rdimensions[d]); + idims.push_back(grid->_simd_layout[d]); + icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]); + ocoor.push_back(orthog[dd]%grid->_rdimensions[d]); + pcoor[d] = orthog[dd]/grid->_ldimensions[d]; + } + Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims); + Lexicographic::IndexFromCoor(icoor,pole_isite,idims); + + int64_t osite; + if(isNorth == North){ + pcoor[xdim] = 0; + pcoor[ydim] = pgrid[ydim]-1; + pcoor[Ndm1] = pgrid[Ndm1]-1; + osite = pole_osite + grid->NorthPoleOsite(); + } else { + pcoor[xdim] = pgrid[xdim]-1; + pcoor[ydim] = 0; + pcoor[Ndm1] = 0; + osite = pole_osite + grid->SouthPoleOsite(); + } + + rank = grid->RankFromProcessorCoor(pcoor); + + if ( rank == grid->ThisRank() ) { + ExtractBuffer buf(Nsimd); + autoView( l_v , l, CpuWrite); + extract(l_v[osite],buf); + s = buf[pole_isite]; + } + grid->Broadcast(rank,s); + + return; +}; +template +void pokePole(const sobj &s,Lattice &l,const Coordinate &orthog,NorthSouth isNorth) +{ + GridBase *grid=l.Grid(); + + assert(grid->Icosahedral()); + + grid->Broadcast(grid->BossRank(),s); + + int Nsimd = grid->Nsimd(); + int rank; + int Ndm1 = grid->_ndimension-1; + Coordinate pgrid = grid->ProcessorGrid(); + const int xdim=0; + const int ydim=1; + const int pdim=Ndm1; + + int64_t pole_osite; + int64_t pole_isite; + Coordinate rdims; + Coordinate idims; + Coordinate ocoor; + Coordinate icoor; + Coordinate pcoor(grid->_ndimension,0); + for(int d=2;d_rdimensions[d]); + idims.push_back(grid->_simd_layout[d]); + icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]); + ocoor.push_back(orthog[dd]%grid->_rdimensions[d]); + pcoor[d] = orthog[dd]/grid->_ldimensions[d]; + + int o = orthog[dd]; + int r = grid->_rdimensions[d]; + int omr = o % r; + } + Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims); + Lexicographic::IndexFromCoor(icoor,pole_isite,idims); + + int64_t osite; + if(isNorth ==North){ + pcoor[xdim] = 0; + pcoor[ydim] = pgrid[ydim]-1; + pcoor[Ndm1] = pgrid[Ndm1]-1; + osite = pole_osite + grid->NorthPoleOsite(); + } else { + pcoor[xdim] = pgrid[xdim]-1; + pcoor[ydim] = 0; + pcoor[Ndm1] = 0; + osite = pole_osite + grid->SouthPoleOsite(); + } + + rank = grid->RankFromProcessorCoor(pcoor); + + // extract-modify-merge cycle is easiest way and this is not perf critical + if ( rank == grid->ThisRank() ) { + ExtractBuffer buf(Nsimd); + autoView( l_v , l, CpuWrite); + extract(l_v[osite],buf); + buf[pole_isite] = s; + merge(l_v[osite],buf); + } + return; +}; + ////////////////////////////////////////////////////////// // Peek a scalar object from the SIMD array ////////////////////////////////////////////////////////// @@ -179,7 +307,7 @@ inline void peekLocalSite(sobj &s,const LatticeView &l,Coordinate &site) for(int w=0;w diff --git a/Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h b/Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h new file mode 100644 index 00000000..0a8cc02e --- /dev/null +++ b/Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h @@ -0,0 +1,175 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma one + +NAMESPACE_BEGIN(Grid); + +class TwoSpinWilsonFermion3plus1DStatic { +public: + // S-direction is INNERMOST and takes no part in the parity. + static const std::vector directions; + static const std::vector displacements; + static constexpr int npoint = 6; + static std::vector MakeDirections(void); + static std::vector MakeDisplacements(void); +}; + +template +class TwoSpinWilsonFermion3plus1D : public TwoSpinWilsonKernels, public TwoSpinWilsonFermion3plus1DStatic +{ +public: + INHERIT_IMPL_TYPES(Impl); + typedef TwoSpinWilsonKernels Kernels; + + FermionField _tmp; + FermionField &tmp(void) { return _tmp; } + + int Dirichlet; + Coordinate Block; + + /////////////////////////////////////////////////////////////// + // Implement the abstract base + /////////////////////////////////////////////////////////////// + GridBase *GaugeGrid(void) { return _ThreeDimGrid ;} + GridBase *GaugeRedBlackGrid(void) { return _ThreeDimRedBlackGrid ;} + GridBase *FermionGrid(void) { return _FourDimGrid;} + GridBase *FermionRedBlackGrid(void) { return _FourDimRedBlackGrid;} + + // full checkerboard operations; leave unimplemented as abstract for now + virtual void M (const FermionField &in, FermionField &out){assert(0);}; + virtual void Mdag (const FermionField &in, FermionField &out){assert(0);}; + + // half checkerboard operations; leave unimplemented as abstract for now + virtual void Meooe (const FermionField &in, FermionField &out); + virtual void Mooee (const FermionField &in, FermionField &out); + virtual void MooeeInv (const FermionField &in, FermionField &out); + + virtual void MeooeDag (const FermionField &in, FermionField &out); + virtual void MooeeDag (const FermionField &in, FermionField &out); + virtual void MooeeInvDag (const FermionField &in, FermionField &out); + virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac + virtual void MdirAll(const FermionField &in, std::vector &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac + + // These can be overridden by fancy 5d chiral action + virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + + // void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; + void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; + void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; + + // Implement hopping term non-hermitian hopping term; half cb or both + // Implement s-diagonal DW + void DW (const FermionField &in, FermionField &out,int dag); + void Dhop (const FermionField &in, FermionField &out,int dag); + void DhopOE(const FermionField &in, FermionField &out,int dag); + void DhopEO(const FermionField &in, FermionField &out,int dag); + + void DhopComms (const FermionField &in, FermionField &out); + void DhopCalc (const FermionField &in, FermionField &out,uint64_t *ids); + + // add a DhopComm + // -- suboptimal interface will presently trigger multiple comms. + void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); + void DhopDirAll(const FermionField &in,std::vector &out); + void DhopDirComms(const FermionField &in); + void DhopDirCalc(const FermionField &in, FermionField &out,int point); + + /////////////////////////////////////////////////////////////// + // New methods added + /////////////////////////////////////////////////////////////// + void DerivInternal(StencilImpl & st, + DoubledGaugeField & U, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag); + + void DhopInternal(StencilImpl & st, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, + int dag); + + void DhopInternalOverlappedComms(StencilImpl & st, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, + int dag); + + void DhopInternalSerialComms(StencilImpl & st, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, + int dag); + + // Constructors + TwoSpinWilsonFermion3plus1D(GaugeField &_Umu, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + GridCartesian &ThreeDimGrid, + GridRedBlackCartesian &ThreeDimRedBlackGrid, + double _M5,const ImplParams &p= ImplParams()); + + virtual void DirichletBlock(const Coordinate & block) + { + } + + // DoubleStore + void ImportGauge(const GaugeField &_Umu); + + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// +public: + + // Add these to the support from Wilson + GridBase *_ThreeDimGrid; + GridBase *_ThreeDimRedBlackGrid; + GridBase *_FourDimGrid; + GridBase *_FourDimRedBlackGrid; + + double M5; + int Ls; + + //Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; + + // Copy of the gauge field , with even and odd subsets + DoubledGaugeField Umu; + DoubledGaugeField UmuEven; + DoubledGaugeField UmuOdd; + +}; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/TwoSpinWilsonImpl.h b/Grid/qcd/action/fermion/TwoSpinWilsonImpl.h new file mode 100644 index 00000000..cde08b81 --- /dev/null +++ b/Grid/qcd/action/fermion/TwoSpinWilsonImpl.h @@ -0,0 +1,222 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h + +Copyright (C) 2015 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + + +///////////////////////////////////////////////////////////////////////////// +// Single flavour four spinors with colour index +///////////////////////////////////////////////////////////////////////////// +template +class TwoSpinWilsonImpl : public PeriodicGaugeImpl > { +public: + + static const int Dimension = Representation::Dimension; + static const bool isFundamental = Representation::isFundamental; + + typedef PeriodicGaugeImpl > Gimpl; + INHERIT_GIMPL_TYPES(Gimpl); + + //Necessary? + constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} + + typedef typename Options::_Coeff_t Coeff_t; + + template using iImplSpinor = iScalar, Nhs> >; + template using iImplPropagator = iScalar, Nhs> >; + template using iImplHalfSpinor = iScalar, Nhs> >; + template using iImplHalfCommSpinor = iScalar, Nhs> >; + template using iImplDoubledGaugeField = iVector >, Nds>; + + typedef iImplSpinor SiteSpinor; + typedef iImplPropagator SitePropagator; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef iImplHalfCommSpinor SiteHalfCommSpinor; + typedef iImplDoubledGaugeField SiteDoubledGaugeField; + + typedef Lattice FermionField; + typedef Lattice PropagatorField; + typedef Lattice DoubledGaugeField; + + typedef SimpleCompressor Compressor; + typedef WilsonImplParams ImplParams; + typedef CartesianStencil StencilImpl; + typedef const typename StencilImpl::View_type StencilView; + + ImplParams Params; + + TwoSpinWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){ + }; + + template + static accelerator_inline void multLink(_Spinor &phi, + const SiteDoubledGaugeField &U, + const _Spinor &chi, + int mu) + { + auto UU = coalescedRead(U(mu)); + mult(&phi(), &UU, &chi()); + } + template + static accelerator_inline void multLink(_Spinor &phi, + const SiteDoubledGaugeField &U, + const _Spinor &chi, + int mu, + StencilEntry *SE, + StencilView &St) + { + multLink(phi,U,chi,mu); + } + + template + inline void multLinkField(_SpinorField & out, + const DoubledGaugeField &Umu, + const _SpinorField & phi, + int mu) + { + const int Nsimd = SiteHalfSpinor::Nsimd(); + autoView( out_v, out, AcceleratorWrite); + autoView( phi_v, phi, AcceleratorRead); + autoView( Umu_v, Umu, AcceleratorRead); + typedef decltype(coalescedRead(out_v[0])) calcSpinor; + accelerator_for(sss,out.Grid()->oSites(),Nsimd,{ + calcSpinor tmp; + multLink(tmp,Umu_v[sss],phi_v(sss),mu); + coalescedWrite(out_v[sss],tmp); + }); + } + + template + static accelerator_inline void loadLinkElement(Simd ®, ref &memory) + { + reg = memory; + } + + inline void DoubleStore(GridBase *GaugeGrid, + DoubledGaugeField &Uds, + const GaugeField &Umu) + { + typedef typename Simd::scalar_type scalar_type; + + conformable(Uds.Grid(), GaugeGrid); + conformable(Umu.Grid(), GaugeGrid); + + GaugeLinkField U(GaugeGrid); + GaugeLinkField tmp(GaugeGrid); + + Lattice > coor(GaugeGrid); + //////////////////////////////////////////////////// + // apply any boundary phase or twists + //////////////////////////////////////////////////// + for (int mu = 0; mu < Nd; mu++) { + + ////////// boundary phase ///////////// + auto pha = Params.boundary_phases[mu]; + scalar_type phase( real(pha),imag(pha) ); + + int L = GaugeGrid->GlobalDimensions()[mu]; + int Lmu = L - 1; + + LatticeCoordinate(coor, mu); + + U = PeekIndex(Umu, mu); + + // apply any twists + RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L; + if ( theta != 0.0) { + scalar_type twphase(::cos(theta),::sin(theta)); + U = twphase*U; + std::cout << GridLogMessage << " Twist ["<(Uds, tmp, mu); + + U = adj(Cshift(U, mu, -1)); + U = where(coor == 0, conjugate(phase) * U, U); + PokeIndex(Uds, U, mu + Nd); + } + } + + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ + GaugeLinkField link(mat.Grid()); + link = TraceIndex(outerProduct(Btilde,A)); + PokeIndex(mat,link,mu); + } + + inline void outerProductImpl(PropagatorField &mat, const FermionField &B, const FermionField &A){ + mat = outerProduct(B,A); + } + + inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) { + mat = TraceIndex(P); + } + + inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds) + { + for (int mu = 0; mu < Nd; mu++) + mat[mu] = PeekIndex(Uds, mu); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu) + { + int Ls=Btilde.Grid()->_fdimensions[0]; + autoView( mat_v , mat, AcceleratorWrite); + { + const int Nsimd = SiteSpinor::Nsimd(); + autoView( Btilde_v , Btilde, AcceleratorRead); + autoView( Atilde_v , Atilde, AcceleratorRead); + accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{ + int sU=sss; + typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType; + ColorMatrixType sum; + zeroit(sum); + for(int s=0;s TwoSpinWilsonImplR; // Real.. whichever prec +typedef TwoSpinWilsonImpl TwoSpinWilsonImplF; // Float +typedef TwoSpinWilsonImpl TwoSpinWilsonImplD; // Double +typedef TwoSpinWilsonImpl TwoSpinWilsonImplD2; // Double + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/TwoSpinWilsonKernels.h b/Grid/qcd/action/fermion/TwoSpinWilsonKernels.h new file mode 100644 index 00000000..5f843a81 --- /dev/null +++ b/Grid/qcd/action/fermion/TwoSpinWilsonKernels.h @@ -0,0 +1,84 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.h + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Helper routines that implement Wilson stencil for a single site. +// Common to both the WilsonFermion and WilsonFermion5D +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template class TwoSpinWilsonKernels : public FermionOperator { +public: + + INHERIT_IMPL_TYPES(Impl); + typedef FermionOperator Base; + typedef AcceleratorVector StencilVector; +public: + + static void DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, + int interior=1,int exterior=1) ; + + static void DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, + uint64_t *ids); + + static void DhopDagKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, + int interior=1,int exterior=1) ; + + static void DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls, + int Nsite, const FermionField &in, std::vector &out) ; + + static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma); + +private: + + static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma); + + static accelerator_inline void DhopDirXp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); + static accelerator_inline void DhopDirYp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); + static accelerator_inline void DhopDirZp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); + static accelerator_inline void DhopDirXm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); + static accelerator_inline void DhopDirYm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); + static accelerator_inline void DhopDirZm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); + + public: + TwoSpinWilsonKernels(const ImplParams &p = ImplParams()) : Base(p){}; +}; + +NAMESPACE_END(Grid); + + diff --git a/Grid/qcd/action/fermion/implementation/TwoSpinWilsonFermion3plus1DImplementation.h b/Grid/qcd/action/fermion/implementation/TwoSpinWilsonFermion3plus1DImplementation.h new file mode 100644 index 00000000..ef499ce6 --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/TwoSpinWilsonFermion3plus1DImplementation.h @@ -0,0 +1,486 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/TwoSpinWilsonFermion2plus1D.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + + // 5d lattice for DWF. +template +TwoSpinWilsonFermion3plus15D::TwoSpinWilsonFermion3plus1D(GaugeField &_Umu, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + GridCartesian &ThreeDimGrid, + GridRedBlackCartesian &ThreeDimRedBlackGrid, + RealD _M5,const ImplParams &p) : + Kernels(p), + _FourDimGrid (&FourDimGrid), + _FourDimRedBlackGrid(&FourDimRedBlackGrid), + _ThreeDimGrid (&ThreeDimGrid), + _ThreeDimRedBlackGrid(&ThreeDimRedBlackGrid), + Stencil (_FourDimGrid,npoint,Even,directions,displacements,p), + StencilEven(_FourDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even + StencilOdd (_FourDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd + M5(_M5), + Umu(_ThreeDimGrid), + UmuEven(_ThreeDimRedBlackGrid), + UmuOdd (_ThreeDimRedBlackGrid), + _tmp(&FourDimRedBlackGrid), + Dirichlet(0) +{ + // some assertions + assert(FourDimGrid._ndimension==Nd+1); + assert(ThreeDimGrid._ndimension==Nd); + assert(ThreeDimRedBlackGrid._ndimension==Nd); + assert(FourDimRedBlackGrid._ndimension==Nd+1); + assert(FourDimRedBlackGrid._checker_dim==1); // Don't checker the s direction + + // extent of fifth dim and not spread out + Ls=FourDimGrid._fdimensions[0]; + assert(FourDimRedBlackGrid._fdimensions[0]==Ls); + assert(FourDimGrid._processors[0] ==1); + assert(FourDimRedBlackGrid._processors[0] ==1); + + // Other dimensions must match the decomposition of the four-D fields + for(int d=0;d +void TwoSpinWilsonFermion3plus1D::ImportGauge(const GaugeField &_Umu) +{ + GaugeField HUmu(_Umu.Grid()); + HUmu = _Umu*(-0.5); + Impl::DoubleStore(GaugeGrid(),Umu,HUmu); + pickCheckerboard(Even,UmuEven,Umu); + pickCheckerboard(Odd ,UmuOdd,Umu); +} +template +void TwoSpinWilsonFermion3plus1D::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp) +{ + int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil + // we drop off the innermost fifth dimension + // assert( (disp==1)||(disp==-1) ); + // assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t; + + int skip = (disp==1) ? 0 : 1; + int dirdisp = dir+skip*Nd; + int gamma = dir+(1-skip)*Nd; + + Compressor compressor(DaggerNo); + Stencil.HaloExchange(in,compressor); + + uint64_t Nsite = Umu.Grid()->oSites(); + Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma); + +}; +template +void TwoSpinWilsonFermion3plus1D::DhopDirAll(const FermionField &in, std::vector &out) +{ + Compressor compressor(DaggerNo); + Stencil.HaloExchange(in,compressor); + uint64_t Nsite = Umu.Grid()->oSites(); + Kernels::DhopDirAll(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out); +}; + + +template +void TwoSpinWilsonFermion3plus1D::DerivInternal(StencilImpl & st, + DoubledGaugeField & U, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) +{ + assert((dag==DaggerNo) ||(dag==DaggerYes)); + + conformable(st.Grid(),A.Grid()); + conformable(st.Grid(),B.Grid()); + + Compressor compressor(dag); + + FermionField Btilde(B.Grid()); + FermionField Atilde(B.Grid()); + + st.HaloExchange(B,compressor); + + Atilde=A; + int LLs = B.Grid()->_rdimensions[0]; + + + for (int mu = 0; mu < Nd; mu++) { + //////////////////////////////////////////////////////////////////////// + // Flip gamma if dag + //////////////////////////////////////////////////////////////////////// + int gamma = mu; + if (!dag) gamma += Nd; + + //////////////////////// + // Call the single hop + //////////////////////// + + int Usites = U.Grid()->oSites(); + + Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma); + + //////////////////////////// + // spin trace outer product + //////////////////////////// + Impl::InsertForce5D(mat, Btilde, Atilde, mu); + } +} + +template +void TwoSpinWilsonFermion3plus1D::DhopDeriv(GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) +{ + conformable(A.Grid(),FermionGrid()); + conformable(A.Grid(),B.Grid()); + + //conformable(GaugeGrid(),mat.Grid());// this is not general! leaving as a comment + + mat.Checkerboard() = A.Checkerboard(); + // mat.checkerboard = A.checkerboard; + + DerivInternal(Stencil,Umu,mat,A,B,dag); +} + +template +void TwoSpinWilsonFermion3plus1D::DhopDerivEO(GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) +{ + conformable(A.Grid(),FermionRedBlackGrid()); + conformable(A.Grid(),B.Grid()); + + assert(B.Checkerboard()==Odd); + assert(A.Checkerboard()==Even); + mat.Checkerboard() = Even; + + DerivInternal(StencilOdd,UmuEven,mat,A,B,dag); +} + + +template +void TwoSpinWilsonFermion3plus1D::DhopDerivOE(GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) +{ + conformable(A.Grid(),FermionRedBlackGrid()); + conformable(A.Grid(),B.Grid()); + + assert(B.Checkerboard()==Even); + assert(A.Checkerboard()==Odd); + mat.Checkerboard() = Odd; + + DerivInternal(StencilEven,UmuOdd,mat,A,B,dag); +} + +template +void TwoSpinWilsonFermion3plus1D::DhopInternal(StencilImpl & st, + DoubledGaugeField & U, + const FermionField &in, FermionField &out,int dag) +{ + DhopInternalSerialComms(st,U,in,out,dag); +} + + +template +void TwoSpinWilsonFermion3plus1D::DhopInternalOverlappedComms(StencilImpl & st, + DoubledGaugeField & U, + const FermionField &in, FermionField &out,int dag) +{ + GRID_TRACE("DhopInternalOverlappedComms"); + Compressor compressor(dag); + + int LLs = in.Grid()->_rdimensions[0]; + int len = U.Grid()->oSites(); + + ///////////////////////////// + // Start comms // Gather intranode and extra node differentiated?? + ///////////////////////////// + { + // std::cout << " TwoSpinWilsonFermion3plus1D gather " < > requests; + +#if 1 + ///////////////////////////// + // Overlap with comms + ///////////////////////////// + st.CommunicateBegin(requests); + st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms +#endif + + ///////////////////////////// + // do the compute interior + ///////////////////////////// + if (dag == DaggerYes) { + GRID_TRACE("DhopDagInterior"); + Kernels::DhopDagKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); + } else { + GRID_TRACE("DhopInterior"); + Kernels::DhopKernel (st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); + } + + //ifdef GRID_ACCELERATED +#if 0 + ///////////////////////////// + // Overlap with comms -- on GPU the interior kernel call is nonblocking + ///////////////////////////// + st.CommunicateBegin(requests); + st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms +#endif + + + ///////////////////////////// + // Complete comms + ///////////////////////////// + // std::cout << " TwoSpinWilsonFermion3plus1D Comms Complete " < +void TwoSpinWilsonFermion3plus1D::DhopInternalSerialComms(StencilImpl & st, + DoubledGaugeField & U, + const FermionField &in, + FermionField &out,int dag) +{ + GRID_TRACE("DhopInternalSerialComms"); + Compressor compressor(dag); + + int LLs = in.Grid()->_rdimensions[0]; + + // std::cout << " TwoSpinWilsonFermion3plus1D Halo exch " < +void TwoSpinWilsonFermion3plus1D::DhopOE(const FermionField &in, FermionField &out,int dag) +{ + conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid + conformable(in.Grid(),out.Grid()); // drops the cb check + + assert(in.Checkerboard()==Even); + out.Checkerboard() = Odd; + + DhopInternal(StencilEven,UmuOdd,in,out,dag); +} +template +void TwoSpinWilsonFermion3plus1D::DhopEO(const FermionField &in, FermionField &out,int dag) +{ + conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid + conformable(in.Grid(),out.Grid()); // drops the cb check + + assert(in.Checkerboard()==Odd); + out.Checkerboard() = Even; + + DhopInternal(StencilOdd,UmuEven,in,out,dag); +} +template +void TwoSpinWilsonFermion3plus1D::DhopComms(const FermionField &in, FermionField &out) +{ + int dag =0 ; + conformable(in.Grid(),FermionGrid()); // verifies full grid + conformable(in.Grid(),out.Grid()); + out.Checkerboard() = in.Checkerboard(); + Compressor compressor(dag); + Stencil.HaloExchangeOpt(in,compressor); +} +template +void TwoSpinWilsonFermion3plus1D::DhopCalc(const FermionField &in, FermionField &out,uint64_t *ids) +{ + conformable(in.Grid(),FermionGrid()); // verifies full grid + conformable(in.Grid(),out.Grid()); + + out.Checkerboard() = in.Checkerboard(); + + int LLs = in.Grid()->_rdimensions[0]; + Kernels::DhopKernel(Stencil,Umu,Stencil.CommBuf(),LLs,Umu.oSites(),in,out,ids); +} + +template +void TwoSpinWilsonFermion3plus1D::Dhop(const FermionField &in, FermionField &out,int dag) +{ + conformable(in.Grid(),FermionGrid()); // verifies full grid + conformable(in.Grid(),out.Grid()); + + out.Checkerboard() = in.Checkerboard(); + + DhopInternal(Stencil,Umu,in,out,dag); +} +template +void TwoSpinWilsonFermion3plus1D::DW(const FermionField &in, FermionField &out,int dag) +{ + out.Checkerboard()=in.Checkerboard(); + Dhop(in,out,dag); // -0.5 is included + axpy(out,Nd*1.0-M5,in,out); +} +template +void TwoSpinWilsonFermion3plus1D::Meooe(const FermionField &in, FermionField &out) +{ + if (in.Checkerboard() == Odd) { + DhopEO(in, out, DaggerNo); + } else { + DhopOE(in, out, DaggerNo); + } +} + +template +void TwoSpinWilsonFermion3plus1D::MeooeDag(const FermionField &in, FermionField &out) +{ + if (in.Checkerboard() == Odd) { + DhopEO(in, out, DaggerYes); + } else { + DhopOE(in, out, DaggerYes); + } +} + +template +void TwoSpinWilsonFermion3plus1D::Mooee(const FermionField &in, FermionField &out) +{ + out.Checkerboard() = in.Checkerboard(); + typename FermionField::scalar_type scal(Nd*1.0 + M5); + out = scal * in; +} + +template +void TwoSpinWilsonFermion3plus1D::MooeeDag(const FermionField &in, FermionField &out) +{ + out.Checkerboard() = in.Checkerboard(); + Mooee(in, out); +} + +template +void TwoSpinWilsonFermion3plus1D::MooeeInv(const FermionField &in, FermionField &out) +{ + out.Checkerboard() = in.Checkerboard(); + out = (1.0/(Nd*1.0 + M5))*in; +} + +template +void TwoSpinWilsonFermion3plus1D::MooeeInvDag(const FermionField &in, FermionField &out) +{ + out.Checkerboard() = in.Checkerboard(); + MooeeInv(in,out); +} + +NAMESPACE_END(Grid); + + + + diff --git a/Grid/qcd/action/fermion/implementation/TwoSpinWilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/TwoSpinWilsonKernelsImplementation.h new file mode 100644 index 00000000..6957cac6 --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/TwoSpinWilsonKernelsImplementation.h @@ -0,0 +1,441 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/TwoSpinWilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +#include + +NAMESPACE_BEGIN(Grid); + + +//////////////////////////////////////////// +// Generic implementation; move to different file? +//////////////////////////////////////////// + +#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \ + SE = st.GetEntry(ptype, Dir, sF); \ + if (SE->_is_local) { \ + int perm= SE->_permute; \ + auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \ + spProj(chi,tmp); \ + } else { \ + chi = coalescedRead(buf[SE->_offset],lane); \ + } \ + acceleratorSynchronise(); \ + Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ + Recon(result, Uchi); + +#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \ + SE = st.GetEntry(ptype, Dir, sF); \ + if (SE->_is_local) { \ + int perm= SE->_permute; \ + auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \ + spProj(chi,tmp); \ + Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ + Recon(result, Uchi); \ + } \ + acceleratorSynchronise(); + +#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \ + SE = st.GetEntry(ptype, Dir, sF); \ + if (!SE->_is_local ) { \ + auto chi = coalescedRead(buf[SE->_offset],lane); \ + Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ + Recon(result, Uchi); \ + nmu++; \ + } \ + acceleratorSynchronise(); + +#define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon) \ + if (SE->_is_local ) { \ + int perm= SE->_permute; \ + auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \ + spProj(chi,tmp); \ + } else { \ + chi = coalescedRead(buf[SE->_offset],lane); \ + } \ + acceleratorSynchronise(); \ + Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \ + Recon(result, Uchi); + +#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \ + if (gamma == Dir) { \ + GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon); \ + } + +//////////////////////////////////////////////////////////////////// +// All legs kernels ; comms then compute +//////////////////////////////////////////////////////////////////// +template accelerator_inline +void TwoSpinWilsonKernels::DhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, + SiteSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(in[0])) calcSpinor; + calcSpinor chi; + calcSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + const int Nsimd = SiteSpinor::Nsimd(); + const int lane=acceleratorSIMTlane(Nsimd); + GENERIC_STENCIL_LEG(Xp,pauliProjXp,pauliAssign); + GENERIC_STENCIL_LEG(Yp,pauliProjYp,pauliAdd); + GENERIC_STENCIL_LEG(Zp,pauliProjZp,pauliAdd); + GENERIC_STENCIL_LEG(Xm,pauliProjXm,pauliAdd); + GENERIC_STENCIL_LEG(Ym,pauliProjYm,pauliAdd); + GENERIC_STENCIL_LEG(Zm,pauliProjZm,pauliAdd); + coalescedWrite(out[sF],result,lane); +}; + +template accelerator_inline +void TwoSpinWilsonKernels::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, + SiteSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(in[0])) calcSpinor; + calcSpinor chi; + // calcSpinor *chi_p; + calcSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + + const int Nsimd = SiteSpinor::Nsimd(); + const int lane=acceleratorSIMTlane(Nsimd); + GENERIC_STENCIL_LEG(Xm,pauliProjXp,pauliAssign); + GENERIC_STENCIL_LEG(Ym,pauliProjYp,pauliAdd); + GENERIC_STENCIL_LEG(Zm,pauliProjZp,pauliAdd); + GENERIC_STENCIL_LEG(Xp,pauliProjXm,pauliAdd); + GENERIC_STENCIL_LEG(Yp,pauliProjYm,pauliAdd); + GENERIC_STENCIL_LEG(Zp,pauliProjZm,pauliAdd); + coalescedWrite(out[sF], result,lane); +}; + //////////////////////////////////////////////////////////////////// + // Interior kernels + //////////////////////////////////////////////////////////////////// +template accelerator_inline +void TwoSpinWilsonKernels::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, + SiteSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(in[0])) calcSpinor; + calcSpinor chi; + // calcSpinor *chi_p; + calcSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + const int Nsimd = SiteSpinor::Nsimd(); + const int lane=acceleratorSIMTlane(Nsimd); + + result=Zero(); + GENERIC_STENCIL_LEG_INT(Xp,pauliProjXp,pauliAdd); + GENERIC_STENCIL_LEG_INT(Yp,pauliProjYp,pauliAdd); + GENERIC_STENCIL_LEG_INT(Zp,pauliProjZp,pauliAdd); + GENERIC_STENCIL_LEG_INT(Xm,pauliProjXm,pauliAdd); + GENERIC_STENCIL_LEG_INT(Ym,pauliProjYm,pauliAdd); + GENERIC_STENCIL_LEG_INT(Zm,pauliProjZm,pauliAdd); + coalescedWrite(out[sF], result,lane); +}; + +template accelerator_inline +void TwoSpinWilsonKernels::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, + SiteSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(in[0])) calcSpinor; + const int Nsimd = SiteSpinor::Nsimd(); + const int lane=acceleratorSIMTlane(Nsimd); + + calcSpinor chi; + // calcSpinor *chi_p; + calcSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + result=Zero(); + GENERIC_STENCIL_LEG_INT(Xm,pauliProjXp,pauliAdd); + GENERIC_STENCIL_LEG_INT(Ym,pauliProjYp,pauliAdd); + GENERIC_STENCIL_LEG_INT(Zm,pauliProjZp,pauliAdd); + GENERIC_STENCIL_LEG_INT(Xp,pauliProjXm,pauliAdd); + GENERIC_STENCIL_LEG_INT(Yp,pauliProjYm,pauliAdd); + GENERIC_STENCIL_LEG_INT(Zp,pauliProjZm,pauliAdd); + coalescedWrite(out[sF], result,lane); +}; +//////////////////////////////////////////////////////////////////// +// Exterior kernels +//////////////////////////////////////////////////////////////////// +template accelerator_inline +void TwoSpinWilsonKernels::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, + SiteSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(in[0])) calcSpinor; + // calcSpinor *chi_p; + calcSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + int nmu=0; + const int Nsimd = SiteSpinor::Nsimd(); + const int lane=acceleratorSIMTlane(Nsimd); + result=Zero(); + GENERIC_STENCIL_LEG_EXT(Xp,pauliProjXp,pauliAdd); + GENERIC_STENCIL_LEG_EXT(Yp,pauliProjYp,pauliAdd); + GENERIC_STENCIL_LEG_EXT(Zp,pauliProjZp,pauliAdd); + GENERIC_STENCIL_LEG_EXT(Xm,pauliProjXm,pauliAdd); + GENERIC_STENCIL_LEG_EXT(Ym,pauliProjYm,pauliAdd); + GENERIC_STENCIL_LEG_EXT(Zm,pauliProjZm,pauliAdd); + if ( nmu ) { + auto out_t = coalescedRead(out[sF],lane); + out_t = out_t + result; + coalescedWrite(out[sF],out_t,lane); + } +}; + +template accelerator_inline +void TwoSpinWilsonKernels::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, + SiteSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(in[0])) calcSpinor; + // calcSpinor *chi_p; + calcSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + int nmu=0; + const int Nsimd = SiteSpinor::Nsimd(); + const int lane=acceleratorSIMTlane(Nsimd); + result=Zero(); + GENERIC_STENCIL_LEG_EXT(Xm,pauliProjXp,pauliAdd); + GENERIC_STENCIL_LEG_EXT(Ym,pauliProjYp,pauliAdd); + GENERIC_STENCIL_LEG_EXT(Zm,pauliProjZp,pauliAdd); + GENERIC_STENCIL_LEG_EXT(Xp,pauliProjXm,pauliAdd); + GENERIC_STENCIL_LEG_EXT(Yp,pauliProjYm,pauliAdd); + GENERIC_STENCIL_LEG_EXT(Zp,pauliProjZm,pauliAdd); + if ( nmu ) { + auto out_t = coalescedRead(out[sF],lane); + out_t = out_t + result; + coalescedWrite(out[sF],out_t,lane); + } +}; + +#define DhopDirMacro(Dir,spProj,spRecon) \ + template accelerator_inline \ + void TwoSpinWilsonKernels::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteSpinor *buf, int sF, \ + int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \ + { \ + typedef decltype(coalescedRead(in[0])) calcSpinor; \ + calcSpinor chi; \ + calcSpinor result; \ + calcSpinor Uchi; \ + StencilEntry *SE; \ + int ptype; \ + const int Nsimd = SiteSpinor::Nsimd(); \ + const int lane=acceleratorSIMTlane(Nsimd); \ + \ + SE = st.GetEntry(ptype, dir, sF); \ + GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon); \ + coalescedWrite(out[sF], result,lane); \ + } + +DhopDirMacro(Xp,pauliProjXp,pauliAssign); +DhopDirMacro(Yp,pauliProjYp,pauliAssign); +DhopDirMacro(Zp,pauliProjZp,pauliAssign); +DhopDirMacro(Xm,pauliProjXm,pauliAssign); +DhopDirMacro(Ym,pauliProjYm,pauliAssign); +DhopDirMacro(Zm,pauliProjZm,pauliAssign); + +template accelerator_inline +void TwoSpinWilsonKernels::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma) +{ + typedef decltype(coalescedRead(in[0])) calcSpinor; + calcSpinor chi; + calcSpinor result; + calcSpinor Uchi; + StencilEntry *SE; + int ptype; + const int Nsimd = SiteSpinor::Nsimd(); + const int lane=acceleratorSIMTlane(Nsimd); + + SE = st.GetEntry(ptype, dir, sF); + GENERIC_DHOPDIR_LEG(Xp,pauliProjXp,pauliAssign); + GENERIC_DHOPDIR_LEG(Yp,pauliProjYp,pauliAssign); + GENERIC_DHOPDIR_LEG(Zp,pauliProjZp,pauliAssign); + GENERIC_DHOPDIR_LEG(Xm,pauliProjXm,pauliAssign); + GENERIC_DHOPDIR_LEG(Ym,pauliProjYm,pauliAssign); + GENERIC_DHOPDIR_LEG(Zm,pauliProjZm,pauliAssign); + coalescedWrite(out[sF], result,lane); +} + +template +void TwoSpinWilsonKernels::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls, + int Nsite, const FermionField &in, std::vector &out) +{ + autoView(U_v ,U,AcceleratorRead); + autoView(in_v ,in,AcceleratorRead); + autoView(st_v ,st,AcceleratorRead); + + autoView(out_Xm,out[0],AcceleratorWrite); + autoView(out_Ym,out[1],AcceleratorWrite); + autoView(out_Zm,out[2],AcceleratorWrite); + autoView(out_Xp,out[4],AcceleratorWrite); + autoView(out_Yp,out[5],AcceleratorWrite); + autoView(out_Zp,out[6],AcceleratorWrite); + auto CBp=st.CommBuf(); + accelerator_for(sss,Nsite*Ls,Simd::Nsimd(),{ + int sU=sss/Ls; + int sF =sss; + DhopDirXm(st_v,U_v,CBp,sF,sU,in_v,out_Xm,0); + DhopDirYm(st_v,U_v,CBp,sF,sU,in_v,out_Ym,1); + DhopDirZm(st_v,U_v,CBp,sF,sU,in_v,out_Zm,2); + DhopDirXp(st_v,U_v,CBp,sF,sU,in_v,out_Xp,3); + DhopDirYp(st_v,U_v,CBp,sF,sU,in_v,out_Yp,4); + DhopDirZp(st_v,U_v,CBp,sF,sU,in_v,out_Zp,5); + }); +} + + +template +void TwoSpinWilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls, + int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma) +{ + assert(dirdisp<=5); + assert(dirdisp>=0); + + autoView(U_v ,U ,AcceleratorRead); + autoView(in_v ,in ,AcceleratorRead); + autoView(out_v,out,AcceleratorWrite); + autoView(st_v ,st ,AcceleratorRead); + auto CBp=st.CommBuf(); +#define LoopBody(Dir) \ + case Dir : \ + accelerator_for(ss,Nsite,Simd::Nsimd(),{ \ + for(int s=0;s::A(st_v,U_v,buf,sF,sU,in_v,out_v); \ + }); + +#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier(); + +#define KERNEL_CALL_EXT(A) \ + const uint64_t sz = st.surface_list.size(); \ + auto ptr = &st.surface_list[0]; \ + accelerator_forNB( ss, sz, Simd::Nsimd(), { \ + int sF = ptr[ss]; \ + int sU = sF/Ls; \ + TwoSpinWilsonKernels::A(st_v,U_v,buf,sF,sU,in_v,out_v); \ + }); \ + accelerator_barrier(); + + +template +void TwoSpinWilsonKernels::DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, + int interior,int exterior) +{ + autoView(U_v , U,AcceleratorRead); + autoView(in_v , in,AcceleratorRead); + autoView(out_v,out,AcceleratorWrite); + autoView(st_v , st,AcceleratorRead); + + if( interior && exterior ) { + acceleratorFenceComputeStream(); + KERNEL_CALL(GenericDhopSite); + return; + } else if( interior ) { + KERNEL_CALLNB(GenericDhopSiteInt); + return; + } else if( exterior ) { + // // dependent on result of merge + acceleratorFenceComputeStream(); + KERNEL_CALL_EXT(GenericDhopSiteExt); + return; + } + assert(0 && " Kernel optimisation case not covered "); +} + +template +void TwoSpinWilsonKernels::DhopDagKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, + int interior,int exterior) +{ + autoView(U_v ,U,AcceleratorRead); + autoView(in_v ,in,AcceleratorRead); + autoView(out_v,out,AcceleratorWrite); + autoView(st_v ,st,AcceleratorRead); + + if( interior && exterior ) { + acceleratorFenceComputeStream(); + KERNEL_CALL(GenericDhopSiteDag); + return; + } else if( interior ) { + KERNEL_CALLNB(GenericDhopSiteDagInt); return; + } else if( exterior ) { + // Dependent on result of merge + acceleratorFenceComputeStream(); + KERNEL_CALL_EXT(GenericDhopSiteDagExt); return; + } + assert(0 && " Kernel optimisation case not covered "); +} + +#undef KERNEL_CALLNB +#undef KERNEL_CALL + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/TwoSpinWilsonFermion3plus1DInstantiation.cc b/Grid/qcd/action/fermion/instantiation/TwoSpinWilsonFermion3plus1DInstantiation.cc new file mode 100644 index 00000000..fc388914 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/TwoSpinWilsonFermion3plus1DInstantiation.cc @@ -0,0 +1,61 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +// S-direction is INNERMOST and takes no part in the parity. + +const std::vector TwoSpinWilsonFermion3plus1DStatic::directions (TwoSpinWilsonFermion3plus1DStatic::MakeDirections()); +const std::vector TwoSpinWilsonFermion3plus1DStatic::displacements(TwoSpinWilsonFermion3plus1DStatic::MakeDisplacements()); + +std::vector TwoSpinWilsonFermion3plus1DStatic::MakeDirections (void) +{ + std::vector directions(2*Nd); + for(int d=0;d TwoSpinWilsonFermion3plus1DStatic::MakeDisplacements(void) +{ + std::vector displacements(2*Nd); + for(int d=0;d +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class TwoSpinWilsonFermion3plus1D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/TwoSpinWilsonKernelsInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/TwoSpinWilsonKernelsInstantiation.cc.master new file mode 100644 index 00000000..630b5c4a --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/TwoSpinWilsonKernelsInstantiation.cc.master @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015, 2020 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: Nils Meyer Regensburg University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class TwoSpinWilsonKernels; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/spin/Pauli.h b/Grid/qcd/spin/Pauli.h new file mode 100644 index 00000000..fac66375 --- /dev/null +++ b/Grid/qcd/spin/Pauli.h @@ -0,0 +1,220 @@ +#ifndef GRID_QCD_PAULI_H +#define GRID_QCD_PAULI_H + +#include + +NAMESPACE_BEGIN(Grid); +// +/* + * Pauli basis + * sx sy sz ident + * (0 1) , (0 -i) , ( 1 0 ) + * (1 0) (i 0) ( 0 -1) + * + * These are hermitian. + * + * Also supply wilson "projectors" (1+/-sx), (1+/-sy), (1+/-sz) + * + * spPauliProjXm + * spPauliProjYm etc... + */ +class Pauli { + public: + GRID_SERIALIZABLE_ENUM(Algebra, undef, + SigmaX , 0, + MinusSigmaX , 1, + SigmaY , 2, + MinusSigmaY , 3, + SigmaZ , 4, + MinusSigmaZ , 5, + Identity , 6, + MinusIdentity , 7); + + static constexpr unsigned int nPauli = 8; + static const std::array name; + static const std::array, nPauli> mul; + static const std::array adj; + static const std::array gmu; + static const std::array gall; + Algebra g; + public: + accelerator Pauli(Algebra initg): g(initg) {} +}; + +#define CopyImplementation(iTemplate,multPauli,multFlavour) \ + template \ + accelerator_inline void multPauli(iTemplate &ret, const iTemplate &rhs) { \ + multFlavour(ret,rhs); \ +} + +CopyImplementation(iVector,multPauliSigmaX,multFlavourSigmaX); +CopyImplementation(iMatrix,lmultPauliSigmaX,lmultFlavourSigmaX); +CopyImplementation(iMatrix,rmultPauliSigmaX,rmultFlavourSigmaX); + +CopyImplementation(iVector,multPauliMinusSigmaX ,multFlavourMinusSigmaX); +CopyImplementation(iMatrix,lmultPauliMinusSigmaX,lmultFlavourMinusSigmaX); +CopyImplementation(iMatrix,rmultPauliMinusSigmaX,rmultFlavourMinusSigmaX); + +CopyImplementation(iVector,multPauliSigmaY,multFlavourSigmaY); +CopyImplementation(iMatrix,lmultPauliSigmaY,lmultFlavourSigmaY); +CopyImplementation(iMatrix,rmultPauliSigmaY,rmultFlavourSigmaY); + +CopyImplementation(iVector,multPauliMinusSigmaY ,multFlavourMinusSigmaY); +CopyImplementation(iMatrix,lmultPauliMinusSigmaY,lmultFlavourMinusSigmaY); +CopyImplementation(iMatrix,rmultPauliMinusSigmaY,rmultFlavourMinusSigmaY); + +CopyImplementation(iVector,multPauliSigmaZ,multFlavourSigmaZ); +CopyImplementation(iMatrix,lmultPauliSigmaZ,lmultFlavourSigmaZ); +CopyImplementation(iMatrix,rmultPauliSigmaZ,rmultFlavourSigmaZ); + +CopyImplementation(iVector,multPauliMinusSigmaZ ,multFlavourMinusSigmaZ); +CopyImplementation(iMatrix,lmultPauliMinusSigmaZ,lmultFlavourMinusSigmaZ); +CopyImplementation(iMatrix,rmultPauliMinusSigmaZ,rmultFlavourMinusSigmaZ); + +CopyImplementation(iVector,multPauliIdentity,multFlavourIdentity); +CopyImplementation(iMatrix,lmultPauliIdentity,lmultFlavourIdentity); +CopyImplementation(iMatrix,rmultPauliIdentity,rmultFlavourIdentity); + +CopyImplementation(iVector,multPauliMinusIdentity ,multFlavourMinusIdentity); +CopyImplementation(iMatrix,lmultPauliMinusIdentity,lmultFlavourMinusIdentity); +CopyImplementation(iMatrix,rmultPauliMinusIdentity,rmultFlavourMinusIdentity); + +/* + * sx sy sz ident + * (0 1) , (0 -i) , ( 1 0 ) + * (1 0) (i 0) ( 0 -1) + */ +template > = 0> accelerator_inline void pauliProjXp (iVector &hspin,const iVector &fspin) +{ + hspin(0)=fspin(0)+fspin(1); + hspin(1)=fspin(1)+fspin(0); +} +template > = 0> accelerator_inline void pauliProjXm (iVector &hspin,const iVector &fspin) +{ + hspin(0)=fspin(0)-fspin(1); + hspin(1)=fspin(1)-fspin(0); +} + +template > = 0> accelerator_inline void pauliProjYp (iVector &hspin,const iVector &fspin) +{ + hspin(0)=fspin(0)-timesI(fspin(1)); + hspin(1)=fspin(1)+timesI(fspin(0)); +} +template > = 0> accelerator_inline void pauliProjYm (iVector &hspin,const iVector &fspin) +{ + hspin(0)=fspin(0)+timesI(fspin(1)); + hspin(1)=fspin(1)-timesI(fspin(0)); +} +template > = 0> accelerator_inline void pauliProjZp (iVector &hspin,const iVector &fspin) +{ + hspin(0)=fspin(0)+fspin(0); + hspin(1)=Zero(); +} +template > = 0> accelerator_inline void pauliProjZm (iVector &hspin,const iVector &fspin) +{ + hspin(0)=Zero(); + hspin(1)=fspin(1)+fspin(1); +} +template > = 0> accelerator_inline void pauliAssign(iVector &fspin,const iVector &hspin) +{ + fspin = hspin; +} +template > = 0> accelerator_inline void pauliAdd (iVector &fspin,const iVector &hspin) +{ + fspin = fspin + hspin; +} + +template +accelerator_inline auto operator*(const Pauli &G, const iVector &arg) +->typename std::enable_if, PauliIndex>::value, iVector>::type +{ + iVector ret; + + switch (G.g) + { + case Pauli::Algebra::SigmaX: + multPauliSigmaX(ret, arg); break; + case Pauli::Algebra::MinusSigmaX: + multPauliMinusSigmaX(ret, arg); break; + case Pauli::Algebra::SigmaY: + multPauliSigmaY(ret, arg); break; + case Pauli::Algebra::MinusSigmaY: + multPauliMinusSigmaY(ret, arg); break; + case Pauli::Algebra::SigmaZ: + multPauliSigmaZ(ret, arg); break; + case Pauli::Algebra::MinusSigmaZ: + multPauliMinusSigmaZ(ret, arg); break; + case Pauli::Algebra::Identity: + multPauliIdentity(ret, arg); break; + case Pauli::Algebra::MinusIdentity: + multPauliMinusIdentity(ret, arg); break; + default: assert(0); + } + + return ret; +} + +template +accelerator_inline auto operator*(const Pauli &G, const iMatrix &arg) +->typename std::enable_if, PauliIndex>::value, iMatrix>::type +{ + iMatrix ret; + + switch (G.g) + { + case Pauli::Algebra::SigmaX: + lmultPauliSigmaX(ret, arg); break; + case Pauli::Algebra::MinusSigmaX: + lmultPauliMinusSigmaX(ret, arg); break; + case Pauli::Algebra::SigmaY: + lmultPauliSigmaY(ret, arg); break; + case Pauli::Algebra::MinusSigmaY: + lmultPauliMinusSigmaY(ret, arg); break; + case Pauli::Algebra::SigmaZ: + lmultPauliSigmaZ(ret, arg); break; + case Pauli::Algebra::MinusSigmaZ: + lmultPauliMinusSigmaZ(ret, arg); break; + case Pauli::Algebra::Identity: + lmultPauliIdentity(ret, arg); break; + case Pauli::Algebra::MinusIdentity: + lmultPauliMinusIdentity(ret, arg); break; + default: assert(0); + } + + return ret; +} + +template +accelerator_inline auto operator*(const iMatrix &arg, const Pauli &G) +->typename std::enable_if, PauliIndex>::value, iMatrix>::type +{ + iMatrix ret; + + switch (G.g) + { + case Pauli::Algebra::SigmaX: + rmultPauliSigmaX(ret, arg); break; + case Pauli::Algebra::MinusSigmaX: + rmultPauliMinusSigmaX(ret, arg); break; + case Pauli::Algebra::SigmaY: + rmultPauliSigmaY(ret, arg); break; + case Pauli::Algebra::MinusSigmaY: + rmultPauliMinusSigmaY(ret, arg); break; + case Pauli::Algebra::SigmaZ: + rmultPauliSigmaZ(ret, arg); break; + case Pauli::Algebra::MinusSigmaZ: + rmultPauliMinusSigmaZ(ret, arg); break; + case Pauli::Algebra::Identity: + rmultPauliIdentity(ret, arg); break; + case Pauli::Algebra::MinusIdentity: + rmultPauliMinusIdentity(ret, arg); break; + default: assert(0); + } + + return ret; +} + + +NAMESPACE_END(Grid); + +#endif // GRID_QCD_GAMMA_H diff --git a/TODO b/TODO index d10d5793..12573208 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,8 @@ + +* Clean up the extract merge and replace with insertLane/extractLane + +----- + i) Refine subspace with HDCG & recompute ii) Block Lanczos in coarse space iii) Batched block project in the operator computation diff --git a/tests/debug/Test_icosahedron.cc b/tests/debug/Test_icosahedron.cc new file mode 100644 index 00000000..cab6a281 --- /dev/null +++ b/tests/debug/Test_icosahedron.cc @@ -0,0 +1,93 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/debug/Test_icosahedron.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +const int MyNd=3; +template using iIcosahedralLorentzComplex = iVector >, MyNd+1 > ; + +typedef iIcosahedralLorentzComplex IcosahedralLorentzComplexD; +typedef iIcosahedralLorentzComplex vIcosahedralLorentzComplexD; +typedef Lattice LatticeIcosahedralLorentzComplexD; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int L=8; + const int Npatch = num_icosahedron_tiles; + + // Put SIMD all in time direction + Coordinate latt_size = GridDefaultLatt(); + Coordinate simd_layout({1,1,vComplexD::Nsimd(),1}); + Coordinate mpi_layout = GridDefaultMpi(); + + std::cout << GridLogMessage << " mpi "<