1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-10-13 12:44:42 +01:00

Beginnings of S2xR

This commit is contained in:
Peter Boyle
2025-10-07 16:11:06 -04:00
parent 35e10a1159
commit 85b2bd4c93
19 changed files with 2324 additions and 17 deletions

View File

@@ -31,5 +31,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/cartesian/Cartesian_base.h> #include <Grid/cartesian/Cartesian_base.h>
#include <Grid/cartesian/Cartesian_full.h> #include <Grid/cartesian/Cartesian_full.h>
#include <Grid/cartesian/Cartesian_red_black.h> #include <Grid/cartesian/Cartesian_red_black.h>
#include <Grid/cartesian/CartesianCrossIcosahedron.h>
#endif #endif

View File

@@ -0,0 +1,197 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/cartesian/CartesianCrossIcosahedron.h
Copyright (C) 2025
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////
// Grid Support.
/////////////////////////////////////////////////////////////////////////////////////////
enum IcosahedralMeshType {
IcosahedralVertices,
IcosahedralEdges
} ;
enum NorthSouth {
North = 1,
South = 0
};
const int num_icosahedron_tiles = 10;
class GridCartesianCrossIcosahedron: public GridCartesian {
public:
IcosahedralMeshType meshType;
IcosahedralMeshType MeshType(void) { return meshType; };
/////////////////////////////////////////////////////////////////////////
// Constructor takes a parent grid and possibly subdivides communicator.
/////////////////////////////////////////////////////////////////////////
/*
GridCartesian(const Coordinate &dimensions,
const Coordinate &simd_layout,
const Coordinate &processor_grid,
const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
{
assert(0); // No subdivision
}
GridCartesian(const Coordinate &dimensions,
const Coordinate &simd_layout,
const Coordinate &processor_grid,
const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
{
assert(0); // No subdivision
}
*/
/////////////////////////////////////////////////////////////////////////
// Construct from comm world
/////////////////////////////////////////////////////////////////////////
GridCartesianCrossIcosahedron(const Coordinate &dimensions,
const Coordinate &simd_layout,
const Coordinate &processor_grid,
IcosahedralMeshType _meshType) : GridCartesian(dimensions,simd_layout,processor_grid)
{
meshType = _meshType;
Coordinate S2dimensions=dimensions;
Coordinate S2simd =simd_layout;
Coordinate S2procs =processor_grid;
assert(simd_layout[0]==1); // Force simd into perpendicular dimensions
assert(simd_layout[1]==1); // to avoid pole storage complexity interacting with SIMD.
assert(dimensions[_ndimension-1]==num_icosahedron_tiles);
assert(processor_grid[_ndimension-1]<=2); // Keeps the patches that need a pole on the same node
// allocate the pole storage if we are seeking vertex domain data
if ( meshType == IcosahedralVertices ) {
InitPoles();
}
}
virtual ~GridCartesianCrossIcosahedron() = default;
////////////////////////////////////////////////
// Use to decide if a given grid is icosahedral
////////////////////////////////////////////////
int hasNorthPole;
int hasSouthPole;
int northPoleOsite;
int southPoleOsite;
int northPoleOsites;
int southPoleOsites;
virtual int Icosahedral(void) override { return 1;}
virtual int ownsNorthPole(void) const override { return hasNorthPole; };
virtual int NorthPoleOsite(void) const override { return northPoleOsite; };
virtual int NorthPoleOsites(void) const override { return northPoleOsites; };
virtual int ownsSouthPole(void) const override { return hasSouthPole; };
virtual int SouthPoleOsite(void) const override { return southPoleOsite; };
virtual int SouthPoleOsites(void) const override { return southPoleOsites; };
void InitPoles(void)
{
int Ndm1 = _ndimension-1;
///////////////////////
// Add the extra pole storage
///////////////////////
// Vertices = 1x LxLx D1...Dn + 2.D1...Dn
// Start after the LxL and don't include the 10 patch dim
int OrthogSize = 1;
for (int d = 2; d < Ndm1; d++) {
OrthogSize *= _gdimensions[d];
}
_fsites += OrthogSize*2;
_gsites += OrthogSize*2;
// Simd reduced sizes are multiplied up.
// If the leading LxL are simd-ized, the vector objects will contain "redundant" lanes
// which should contain identical north (south) pole data
OrthogSize = 1;
for (int d = 2; d < Ndm1; d++) {
OrthogSize *= _rdimensions[d];
}
// Grow the local volume to hold pole data
// on rank (0,0) in the LxL planes
// since SIMD must be placed in the orthogonal directions
Coordinate pcoor = this->ThisProcessorCoor();
Coordinate pgrid = this->ProcessorGrid();
const int xdim=0;
const int ydim=1;
/*
*
* /\/\/\/\/\
* /\/\/\/\/\/
* \/\/\/\/\/
*
* y
* /
* \x
*
* Labelling patches as 5 6 7 8 9
* 0 1 2 3 4
*
* Will ban distribution of the patch dimension by more than 2.
*
* Hence all 5 patches associated with the pole must have the
* appropriate "corner" of the patch L^2 located on the SAME rank.
*/
if( (pcoor[xdim]==pgrid[xdim]-1) && (pcoor[ydim]==0) && (pcoor[Ndm1]==0) ){
hasSouthPole =1;
southPoleOsite=this->_osites;
southPoleOsites=OrthogSize;
this->_osites += OrthogSize;
} else {
hasSouthPole =0;
southPoleOsites=0;
southPoleOsite=0;
}
if( (pcoor[xdim]==0) && (pcoor[ydim]==pgrid[ydim]-1) && (pcoor[Ndm1]==pgrid[Ndm1]-1) ){
hasNorthPole =1;
northPoleOsite=this->_osites;
northPoleOsites=OrthogSize;
this->_osites += OrthogSize;
} else {
hasNorthPole =0;
northPoleOsites=0;
northPoleOsite=0;
}
std::cout << "Icosahedral vertex field volume " << this->_osites<<std::endl;
std::cout << "Icosahedral south pole offset " << this->southPoleOsite<<std::endl;
std::cout << "Icosahedral north pole offset " << this->northPoleOsite<<std::endl;
std::cout << "Icosahedral south pole size " << this->southPoleOsites<<std::endl;
std::cout << "Icosahedral north pole size " << this->northPoleOsites<<std::endl;
};
};
NAMESPACE_END(Grid);

View File

@@ -86,11 +86,20 @@ public:
public: public:
// Icosahedral decisions
virtual int Icosahedral(void) { return 0;}
virtual int ownsNorthPole(void) const { return 0; };
virtual int ownsSouthPole(void) const { return 0; };
virtual int NorthPoleOsite(void) const { return 0; };
virtual int SouthPoleOsite(void) const { return 0; };
virtual int NorthPoleOsites(void) const { std::cout << "base osites" <<std::endl;return 0; };
virtual int SouthPoleOsites(void) const { std::cout << "base osites" <<std::endl;return 0; };
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Checkerboarding interface is virtual and overridden by // Checkerboarding interface is virtual and overridden by
// GridCartesian / GridRedBlackCartesian // GridCartesian / GridRedBlackCartesian
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
virtual int Icosahedral(void) { return 0;}
virtual int CheckerBoarded(int dim) =0; virtual int CheckerBoarded(int dim) =0;
virtual int CheckerBoard(const Coordinate &site)=0; virtual int CheckerBoard(const Coordinate &site)=0;
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
@@ -177,6 +186,8 @@ public:
} }
return permute_type; return permute_type;
} }
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Array sizing queries // Array sizing queries
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////

View File

@@ -34,6 +34,8 @@ NAMESPACE_BEGIN(Grid);
const int Cshift_verbose=0; const int Cshift_verbose=0;
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift) template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
{ {
assert(!rhs.Grid()->Icosahedral());
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;

View File

@@ -30,6 +30,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift) template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
{ {
assert(!rhs.Grid()->Icosahedral());
Lattice<vobj> ret(rhs.Grid()); Lattice<vobj> ret(rhs.Grid());
ret.Checkerboard() = rhs.Grid()->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension); ret.Checkerboard() = rhs.Grid()->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension);
Cshift_local(ret,rhs,dimension,shift); Cshift_local(ret,rhs,dimension,shift);

View File

@@ -373,14 +373,17 @@ public:
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
for(int64_t g=0;g<o.Grid()->_gsites;g++){ uint64_t gsites=1;
uint64_t polesites=0;
for(int d=0;d<o.Grid()->_ndimension;d++) gsites *= o.Grid()->_gdimensions[d];
for(int64_t g=0;g<gsites;g++){
Coordinate gcoor; Coordinate gcoor;
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor); o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
sobj ss; sobj ss;
peekSite(ss,o,gcoor); peekSite(ss,o,gcoor);
stream<<"["; stream<<"["<< g<<" : ";
for(int d=0;d<gcoor.size();d++){ for(int d=0;d<gcoor.size();d++){
stream<<gcoor[d]; stream<<gcoor[d];
if(d!=gcoor.size()-1) stream<<","; if(d!=gcoor.size()-1) stream<<",";
@@ -388,6 +391,41 @@ template<class vobj> std::ostream& operator<< (std::ostream& stream, const Latti
stream<<"]\t"; stream<<"]\t";
stream<<ss<<std::endl; stream<<ss<<std::endl;
} }
if ( o.Grid()->Icosahedral() ) {
uint64_t psites=1;
Coordinate perpdims;
for(int d=2;d<o.Grid()->_ndimension-1;d++){
int pd=o.Grid()->_gdimensions[d];
psites*=pd;
perpdims.push_back(pd);
}
for(uint64_t p=0;p<psites;p++){
sobj ss;
Coordinate orthog;
Lexicographic::CoorFromIndex(orthog,p,perpdims);
peekPole(ss,o,orthog,South);
stream<<"[ SouthPole : ";
for(int d=0;d<orthog.size();d++){
stream<<orthog[d];
if(d!=orthog.size()-1) stream<<",";
}
stream<<"]\t";
stream<<ss<<std::endl;
}
for(uint64_t p=0;p<psites;p++){
sobj ss;
Coordinate orthog;
Lexicographic::CoorFromIndex(orthog,p,perpdims);
peekPole(ss,o,orthog,North);
stream<<"[ NorthPole : ";
for(int d=0;d<orthog.size();d++){
stream<<orthog[d];
if(d!=orthog.size()-1) stream<<",";
}
stream<<"]\t";
stream<<ss<<std::endl;
}
}
return stream; return stream;
} }

View File

@@ -34,22 +34,84 @@ template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
typedef typename iobj::scalar_type scalar_type; typedef typename iobj::scalar_type scalar_type;
typedef typename iobj::vector_type vector_type; typedef typename iobj::vector_type vector_type;
l=Zero();
GridBase *grid = l.Grid(); GridBase *grid = l.Grid();
int Nsimd = grid->iSites(); int Nsimd = grid->iSites();
autoView(l_v, l, CpuWrite); int cartesian_vol = grid->oSites();
thread_for( o, grid->oSites(), { if ( grid->Icosahedral() ) {
vector_type vI; cartesian_vol = cartesian_vol - grid->NorthPoleOsites()-grid->SouthPoleOsites();
Coordinate gcoor; }
ExtractBuffer<scalar_type> mergebuf(Nsimd); {
for(int i=0;i<grid->iSites();i++){ autoView(l_v, l, CpuWrite);
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor); thread_for( o, cartesian_vol, {
mergebuf[i]=(Integer)gcoor[mu]; vector_type vI;
Coordinate gcoor;
ExtractBuffer<scalar_type> mergebuf(Nsimd);
for(int i=0;i<grid->iSites();i++){
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
mergebuf[i]=(Integer)gcoor[mu];
}
merge<vector_type,scalar_type>(vI,mergebuf);
l_v[o]=vI;
});
}
if (grid->Icosahedral()) {
uint64_t psites=1;
Coordinate perpdims;
typename iobj::scalar_object ss;
for(int d=2;d<grid->_ndimension-1;d++){
int pd=grid->_gdimensions[d];
psites*=pd;
perpdims.push_back(pd);
} }
merge<vector_type,scalar_type>(vI,mergebuf); for(uint64_t p=0;p<psites;p++){
l_v[o]=vI; Coordinate orthog;
}); Lexicographic::CoorFromIndex(orthog,p,perpdims);
int icoor;
if ( mu>=2 && mu < grid->_ndimension-1) {
icoor = orthog[mu-2];
} else {
icoor = -1;
}
ss=scalar_type(icoor);
pokePole(ss,l,orthog,South);
pokePole(ss,l,orthog,North);
}
}
};
template<class iobj> inline void LatticePole(Lattice<iobj> &l,NorthSouth pole)
{
typedef typename iobj::scalar_object sobj;
typedef typename iobj::scalar_type scalar_type;
typedef typename iobj::vector_type vector_type;
GridBase *grid = l.Grid();
l=Zero();
if (grid->Icosahedral()) {
uint64_t psites=1;
Coordinate perpdims;
sobj ss;
scalar_type one(1.0);
ss=one;
for(int d=2;d<l.Grid()->_ndimension-1;d++){
int pd=l.Grid()->_gdimensions[d];
psites*=pd;
perpdims.push_back(pd);
}
for(uint64_t p=0;p<psites;p++){
Coordinate orthog;
Lexicographic::CoorFromIndex(orthog,p,perpdims);
pokePole(ss,l,orthog,pole);
}
}
}; };
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@@ -141,7 +141,7 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
grid->GlobalCoorToRankIndex(rank,odx,idx,site); grid->GlobalCoorToRankIndex(rank,odx,idx,site);
ExtractBuffer<sobj> buf(Nsimd); ExtractBuffer<sobj> buf(Nsimd);
autoView( l_v , l, CpuWrite); autoView( l_v , l, CpuRead);
extract(l_v[odx],buf); extract(l_v[odx],buf);
s = buf[idx]; s = buf[idx];
@@ -151,6 +151,134 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
return; return;
}; };
// zero for south pole, one for north pole
template<class vobj,class sobj>
void peekPole(sobj &s,const Lattice<vobj> &l,const Coordinate &orthog,NorthSouth isNorth)
{
s=Zero();
GridBase *grid=l.Grid();
assert(grid->Icosahedral());
int Nsimd = grid->Nsimd();
int rank;
int Ndm1 = grid->_ndimension-1;
Coordinate pgrid = grid->ProcessorGrid();
const int xdim=0;
const int ydim=1;
const int pdim=Ndm1;
int64_t pole_osite;
int64_t pole_isite;
Coordinate rdims;
Coordinate idims;
Coordinate ocoor;
Coordinate icoor;
Coordinate pcoor(grid->_ndimension);
for(int d=2;d<Ndm1;d++){
int dd=d-2;
rdims.push_back(grid->_rdimensions[d]);
idims.push_back(grid->_simd_layout[d]);
icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]);
ocoor.push_back(orthog[dd]%grid->_rdimensions[d]);
pcoor[d] = orthog[dd]/grid->_ldimensions[d];
}
Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims);
Lexicographic::IndexFromCoor(icoor,pole_isite,idims);
int64_t osite;
if(isNorth == North){
pcoor[xdim] = 0;
pcoor[ydim] = pgrid[ydim]-1;
pcoor[Ndm1] = pgrid[Ndm1]-1;
osite = pole_osite + grid->NorthPoleOsite();
} else {
pcoor[xdim] = pgrid[xdim]-1;
pcoor[ydim] = 0;
pcoor[Ndm1] = 0;
osite = pole_osite + grid->SouthPoleOsite();
}
rank = grid->RankFromProcessorCoor(pcoor);
if ( rank == grid->ThisRank() ) {
ExtractBuffer<sobj> buf(Nsimd);
autoView( l_v , l, CpuWrite);
extract(l_v[osite],buf);
s = buf[pole_isite];
}
grid->Broadcast(rank,s);
return;
};
template<class vobj,class sobj>
void pokePole(const sobj &s,Lattice<vobj> &l,const Coordinate &orthog,NorthSouth isNorth)
{
GridBase *grid=l.Grid();
assert(grid->Icosahedral());
grid->Broadcast(grid->BossRank(),s);
int Nsimd = grid->Nsimd();
int rank;
int Ndm1 = grid->_ndimension-1;
Coordinate pgrid = grid->ProcessorGrid();
const int xdim=0;
const int ydim=1;
const int pdim=Ndm1;
int64_t pole_osite;
int64_t pole_isite;
Coordinate rdims;
Coordinate idims;
Coordinate ocoor;
Coordinate icoor;
Coordinate pcoor(grid->_ndimension,0);
for(int d=2;d<Ndm1;d++){
int dd = d-2;
rdims.push_back(grid->_rdimensions[d]);
idims.push_back(grid->_simd_layout[d]);
icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]);
ocoor.push_back(orthog[dd]%grid->_rdimensions[d]);
pcoor[d] = orthog[dd]/grid->_ldimensions[d];
int o = orthog[dd];
int r = grid->_rdimensions[d];
int omr = o % r;
}
Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims);
Lexicographic::IndexFromCoor(icoor,pole_isite,idims);
int64_t osite;
if(isNorth ==North){
pcoor[xdim] = 0;
pcoor[ydim] = pgrid[ydim]-1;
pcoor[Ndm1] = pgrid[Ndm1]-1;
osite = pole_osite + grid->NorthPoleOsite();
} else {
pcoor[xdim] = pgrid[xdim]-1;
pcoor[ydim] = 0;
pcoor[Ndm1] = 0;
osite = pole_osite + grid->SouthPoleOsite();
}
rank = grid->RankFromProcessorCoor(pcoor);
// extract-modify-merge cycle is easiest way and this is not perf critical
if ( rank == grid->ThisRank() ) {
ExtractBuffer<sobj> buf(Nsimd);
autoView( l_v , l, CpuWrite);
extract(l_v[osite],buf);
buf[pole_isite] = s;
merge(l_v[osite],buf);
}
return;
};
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// Peek a scalar object from the SIMD array // Peek a scalar object from the SIMD array
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
@@ -179,7 +307,7 @@ inline void peekLocalSite(sobj &s,const LatticeView<vobj> &l,Coordinate &site)
for(int w=0;w<words;w++){ for(int w=0;w<words;w++){
pt[w] = getlane(vp[w],idx); pt[w] = getlane(vp[w],idx);
} }
// std::cout << "peekLocalSite "<<site<<" "<<odx<<","<<idx<<" "<<s<<std::endl;
return; return;
}; };
template<class vobj,class sobj> template<class vobj,class sobj>

View File

@@ -0,0 +1,175 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma one
NAMESPACE_BEGIN(Grid);
class TwoSpinWilsonFermion3plus1DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
static constexpr int npoint = 6;
static std::vector<int> MakeDirections(void);
static std::vector<int> MakeDisplacements(void);
};
template<class Impl>
class TwoSpinWilsonFermion3plus1D : public TwoSpinWilsonKernels<Impl>, public TwoSpinWilsonFermion3plus1DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef TwoSpinWilsonKernels<Impl> Kernels;
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
int Dirichlet;
Coordinate Block;
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _ThreeDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _ThreeDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FourDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FourDimRedBlackGrid;}
// full checkerboard operations; leave unimplemented as abstract for now
virtual void M (const FermionField &in, FermionField &out){assert(0);};
virtual void Mdag (const FermionField &in, FermionField &out){assert(0);};
// half checkerboard operations; leave unimplemented as abstract for now
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
// These can be overridden by fancy 5d chiral action
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
// Implement hopping term non-hermitian hopping term; half cb or both
// Implement s-diagonal DW
void DW (const FermionField &in, FermionField &out,int dag);
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
void DhopComms (const FermionField &in, FermionField &out);
void DhopCalc (const FermionField &in, FermionField &out,uint64_t *ids);
// add a DhopComm
// -- suboptimal interface will presently trigger multiple comms.
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
void DhopDirAll(const FermionField &in,std::vector<FermionField> &out);
void DhopDirComms(const FermionField &in);
void DhopDirCalc(const FermionField &in, FermionField &out,int point);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
void DhopInternal(StencilImpl & st,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalOverlappedComms(StencilImpl & st,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalSerialComms(StencilImpl & st,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
// Constructors
TwoSpinWilsonFermion3plus1D(GaugeField &_Umu,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
GridCartesian &ThreeDimGrid,
GridRedBlackCartesian &ThreeDimRedBlackGrid,
double _M5,const ImplParams &p= ImplParams());
virtual void DirichletBlock(const Coordinate & block)
{
}
// DoubleStore
void ImportGauge(const GaugeField &_Umu);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
// Add these to the support from Wilson
GridBase *_ThreeDimGrid;
GridBase *_ThreeDimRedBlackGrid;
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
double M5;
int Ls;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
};
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,222 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////
// Single flavour four spinors with colour index
/////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation,class Options = CoeffReal >
class TwoSpinWilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
typedef typename Options::_Coeff_t Coeff_t;
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<Simd> SiteHalfCommSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef SimpleCompressor<SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef CartesianStencil<SiteSpinor, SiteSpinor, ImplParams> StencilImpl;
typedef const typename StencilImpl::View_type StencilView;
ImplParams Params;
TwoSpinWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){
};
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu)
{
auto UU = coalescedRead(U(mu));
mult(&phi(), &UU, &chi());
}
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu,
StencilEntry *SE,
StencilView &St)
{
multLink(phi,U,chi,mu);
}
template<class _SpinorField>
inline void multLinkField(_SpinorField & out,
const DoubledGaugeField &Umu,
const _SpinorField & phi,
int mu)
{
const int Nsimd = SiteHalfSpinor::Nsimd();
autoView( out_v, out, AcceleratorWrite);
autoView( phi_v, phi, AcceleratorRead);
autoView( Umu_v, Umu, AcceleratorRead);
typedef decltype(coalescedRead(out_v[0])) calcSpinor;
accelerator_for(sss,out.Grid()->oSites(),Nsimd,{
calcSpinor tmp;
multLink(tmp,Umu_v[sss],phi_v(sss),mu);
coalescedWrite(out_v[sss],tmp);
});
}
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
reg = memory;
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &Uds,
const GaugeField &Umu)
{
typedef typename Simd::scalar_type scalar_type;
conformable(Uds.Grid(), GaugeGrid);
conformable(Umu.Grid(), GaugeGrid);
GaugeLinkField U(GaugeGrid);
GaugeLinkField tmp(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
////////////////////////////////////////////////////
// apply any boundary phase or twists
////////////////////////////////////////////////////
for (int mu = 0; mu < Nd; mu++) {
////////// boundary phase /////////////
auto pha = Params.boundary_phases[mu];
scalar_type phase( real(pha),imag(pha) );
int L = GaugeGrid->GlobalDimensions()[mu];
int Lmu = L - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu);
// apply any twists
RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L;
if ( theta != 0.0) {
scalar_type twphase(::cos(theta),::sin(theta));
U = twphase*U;
std::cout << GridLogMessage << " Twist ["<<mu<<"] "<< Params.twist_n_2pi_L[mu]<< " phase"<<phase <<std::endl;
}
tmp = where(coor == Lmu, phase * U, U);
PokeIndex<LorentzIndex>(Uds, tmp, mu);
U = adj(Cshift(U, mu, -1));
U = where(coor == 0, conjugate(phase) * U, U);
PokeIndex<LorentzIndex>(Uds, U, mu + Nd);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat.Grid());
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
inline void outerProductImpl(PropagatorField &mat, const FermionField &B, const FermionField &A){
mat = outerProduct(B,A);
}
inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
mat = TraceIndex<SpinIndex>(P);
}
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds)
{
for (int mu = 0; mu < Nd; mu++)
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu)
{
int Ls=Btilde.Grid()->_fdimensions[0];
autoView( mat_v , mat, AcceleratorWrite);
{
const int Nsimd = SiteSpinor::Nsimd();
autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead);
accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
int sU=sss;
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
ColorMatrixType sum;
zeroit(sum);
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
for(int spn=0;spn<Ns;spn++){ //sum over spin
auto bb = coalescedRead(Btilde_v[sF]()(spn) ); //color vector
auto aa = coalescedRead(Atilde_v[sF]()(spn) );
auto op = outerProduct(bb,aa);
sum = sum + op;
}
}
coalescedWrite(mat_v[sU](mu)(), sum);
});
}
}
};
typedef TwoSpinWilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplR; // Real.. whichever prec
typedef TwoSpinWilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplF; // Float
typedef TwoSpinWilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplD; // Double
typedef TwoSpinWilsonImpl<vComplexD2, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplD2; // Double
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,84 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Impl> class TwoSpinWilsonKernels : public FermionOperator<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
typedef AcceleratorVector<int,STENCIL_MAX> StencilVector;
public:
static void DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
static void DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
uint64_t *ids);
static void DhopDagKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
static void DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
int Nsite, const FermionField &in, std::vector<FermionField> &out) ;
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
private:
static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
static accelerator_inline void DhopDirXp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirYp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirZp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirXm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirYm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirZm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
public:
TwoSpinWilsonKernels(const ImplParams &p = ImplParams()) : Base(p){};
};
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,486 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/TwoSpinWilsonFermion2plus1D.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
#include <Grid/perfmon/PerfCount.h>
NAMESPACE_BEGIN(Grid);
// 5d lattice for DWF.
template<class Impl>
TwoSpinWilsonFermion3plus15D<Impl>::TwoSpinWilsonFermion3plus1D(GaugeField &_Umu,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
GridCartesian &ThreeDimGrid,
GridRedBlackCartesian &ThreeDimRedBlackGrid,
RealD _M5,const ImplParams &p) :
Kernels(p),
_FourDimGrid (&FourDimGrid),
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
_ThreeDimGrid (&ThreeDimGrid),
_ThreeDimRedBlackGrid(&ThreeDimRedBlackGrid),
Stencil (_FourDimGrid,npoint,Even,directions,displacements,p),
StencilEven(_FourDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even
StencilOdd (_FourDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd
M5(_M5),
Umu(_ThreeDimGrid),
UmuEven(_ThreeDimRedBlackGrid),
UmuOdd (_ThreeDimRedBlackGrid),
_tmp(&FourDimRedBlackGrid),
Dirichlet(0)
{
// some assertions
assert(FourDimGrid._ndimension==Nd+1);
assert(ThreeDimGrid._ndimension==Nd);
assert(ThreeDimRedBlackGrid._ndimension==Nd);
assert(FourDimRedBlackGrid._ndimension==Nd+1);
assert(FourDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
// extent of fifth dim and not spread out
Ls=FourDimGrid._fdimensions[0];
assert(FourDimRedBlackGrid._fdimensions[0]==Ls);
assert(FourDimGrid._processors[0] ==1);
assert(FourDimRedBlackGrid._processors[0] ==1);
// Other dimensions must match the decomposition of the four-D fields
for(int d=0;d<Nd;d++){
assert(FourDimGrid._processors[d+1] ==ThreeDimGrid._processors[d]);
assert(FourDimRedBlackGrid._processors[d+1] ==ThreeDimGrid._processors[d]);
assert(ThreeDimRedBlackGrid._processors[d] ==ThreeDimGrid._processors[d]);
assert(FourDimGrid._fdimensions[d+1] ==ThreeDimGrid._fdimensions[d]);
assert(FourDimRedBlackGrid._fdimensions[d+1]==ThreeDimGrid._fdimensions[d]);
assert(ThreeDimRedBlackGrid._fdimensions[d] ==ThreeDimGrid._fdimensions[d]);
assert(FourDimGrid._simd_layout[d+1] ==ThreeDimGrid._simd_layout[d]);
assert(FourDimRedBlackGrid._simd_layout[d+1]==ThreeDimGrid._simd_layout[d]);
assert(ThreeDimRedBlackGrid._simd_layout[d] ==ThreeDimGrid._simd_layout[d]);
}
if ( p.dirichlet.size() == Nd+1) {
Coordinate block = p.dirichlet;
for(int d=0;d<Nd+1;d++) {
if ( block[d] ){
Dirichlet = 1;
std::cout << GridLogMessage << " WilsonFermion: non-trivial Dirichlet condition "<< block << std::endl;
std::cout << GridLogMessage << " WilsonFermion: partial Dirichlet "<< p.partialDirichlet << std::endl;
Block = block;
}
}
} else {
Coordinate block(Nd+1,0);
Block = block;
}
// Dimension zero of the five-d is the Ls direction
assert(FourDimRedBlackGrid._simd_layout[0]==1);
assert(FourDimGrid._simd_layout[0] ==1);
// Allocate the required comms buffer
ImportGauge(_Umu);
// Build lists of exterior only nodes
int LLs = FourDimGrid._rdimensions[0];
int vol3;
vol3=ThreeDimGrid.oSites();
Stencil.BuildSurfaceList(LLs,vol3);
vol3=ThreeDimRedBlackGrid.oSites();
StencilEven.BuildSurfaceList(LLs,vol3);
StencilOdd.BuildSurfaceList(LLs,vol3);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::ImportGauge(const GaugeField &_Umu)
{
GaugeField HUmu(_Umu.Grid());
HUmu = _Umu*(-0.5);
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
pickCheckerboard(Even,UmuEven,Umu);
pickCheckerboard(Odd ,UmuOdd,Umu);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp)
{
int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
// we drop off the innermost fifth dimension
// assert( (disp==1)||(disp==-1) );
// assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
int skip = (disp==1) ? 0 : 1;
int dirdisp = dir+skip*Nd;
int gamma = dir+(1-skip)*Nd;
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in,compressor);
uint64_t Nsite = Umu.Grid()->oSites();
Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
};
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
{
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in,compressor);
uint64_t Nsite = Umu.Grid()->oSites();
Kernels::DhopDirAll(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out);
};
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
assert((dag==DaggerNo) ||(dag==DaggerYes));
conformable(st.Grid(),A.Grid());
conformable(st.Grid(),B.Grid());
Compressor compressor(dag);
FermionField Btilde(B.Grid());
FermionField Atilde(B.Grid());
st.HaloExchange(B,compressor);
Atilde=A;
int LLs = B.Grid()->_rdimensions[0];
for (int mu = 0; mu < Nd; mu++) {
////////////////////////////////////////////////////////////////////////
// Flip gamma if dag
////////////////////////////////////////////////////////////////////////
int gamma = mu;
if (!dag) gamma += Nd;
////////////////////////
// Call the single hop
////////////////////////
int Usites = U.Grid()->oSites();
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma);
////////////////////////////
// spin trace outer product
////////////////////////////
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
}
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDeriv(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
conformable(A.Grid(),FermionGrid());
conformable(A.Grid(),B.Grid());
//conformable(GaugeGrid(),mat.Grid());// this is not general! leaving as a comment
mat.Checkerboard() = A.Checkerboard();
// mat.checkerboard = A.checkerboard;
DerivInternal(Stencil,Umu,mat,A,B,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
conformable(A.Grid(),FermionRedBlackGrid());
conformable(A.Grid(),B.Grid());
assert(B.Checkerboard()==Odd);
assert(A.Checkerboard()==Even);
mat.Checkerboard() = Even;
DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
conformable(A.Grid(),FermionRedBlackGrid());
conformable(A.Grid(),B.Grid());
assert(B.Checkerboard()==Even);
assert(A.Checkerboard()==Odd);
mat.Checkerboard() = Odd;
DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternal(StencilImpl & st,
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
DhopInternalSerialComms(st,U,in,out,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternalOverlappedComms(StencilImpl & st,
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
GRID_TRACE("DhopInternalOverlappedComms");
Compressor compressor(dag);
int LLs = in.Grid()->_rdimensions[0];
int len = U.Grid()->oSites();
/////////////////////////////
// Start comms // Gather intranode and extra node differentiated??
/////////////////////////////
{
// std::cout << " TwoSpinWilsonFermion3plus1D gather " <<std::endl;
GRID_TRACE("Gather");
st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
}
// std::cout << " TwoSpinWilsonFermion3plus1D Communicate Begin " <<std::endl;
std::vector<std::vector<CommsRequest_t> > requests;
#if 1
/////////////////////////////
// Overlap with comms
/////////////////////////////
st.CommunicateBegin(requests);
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
#endif
/////////////////////////////
// do the compute interior
/////////////////////////////
if (dag == DaggerYes) {
GRID_TRACE("DhopDagInterior");
Kernels::DhopDagKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
} else {
GRID_TRACE("DhopInterior");
Kernels::DhopKernel (st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
}
//ifdef GRID_ACCELERATED
#if 0
/////////////////////////////
// Overlap with comms -- on GPU the interior kernel call is nonblocking
/////////////////////////////
st.CommunicateBegin(requests);
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
#endif
/////////////////////////////
// Complete comms
/////////////////////////////
// std::cout << " TwoSpinWilsonFermion3plus1D Comms Complete " <<std::endl;
st.CommunicateComplete(requests);
// traceStop(id);
/////////////////////////////
// do the compute exterior
/////////////////////////////
{
// std::cout << " TwoSpinWilsonFermion3plus1D Comms Merge " <<std::endl;
GRID_TRACE("Merge");
st.CommsMerge(compressor);
}
// std::cout << " TwoSpinWilsonFermion3plus1D Exterior " <<std::endl;
if (dag == DaggerYes) {
GRID_TRACE("DhopDagExterior");
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
} else {
GRID_TRACE("DhopExterior");
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
}
// std::cout << " TwoSpinWilsonFermion3plus1D Done " <<std::endl;
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternalSerialComms(StencilImpl & st,
DoubledGaugeField & U,
const FermionField &in,
FermionField &out,int dag)
{
GRID_TRACE("DhopInternalSerialComms");
Compressor compressor(dag);
int LLs = in.Grid()->_rdimensions[0];
// std::cout << " TwoSpinWilsonFermion3plus1D Halo exch " <<std::endl;
{
GRID_TRACE("HaloExchange");
st.HaloExchangeOpt(in,compressor);
}
// std::cout << " TwoSpinWilsonFermion3plus1D Dhop " <<std::endl;
if (dag == DaggerYes) {
GRID_TRACE("DhopDag");
Kernels::DhopDagKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out);
} else {
GRID_TRACE("Dhop");
Kernels::DhopKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out);
}
// std::cout << " TwoSpinWilsonFermion3plus1D Done " <<std::endl;
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
{
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.Checkerboard()==Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven,UmuOdd,in,out,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
{
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.Checkerboard()==Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd,UmuEven,in,out,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopComms(const FermionField &in, FermionField &out)
{
int dag =0 ;
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
Compressor compressor(dag);
Stencil.HaloExchangeOpt(in,compressor);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopCalc(const FermionField &in, FermionField &out,uint64_t *ids)
{
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
int LLs = in.Grid()->_rdimensions[0];
Kernels::DhopKernel(Stencil,Umu,Stencil.CommBuf(),LLs,Umu.oSites(),in,out,ids);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
{
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil,Umu,in,out,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
{
out.Checkerboard()=in.Checkerboard();
Dhop(in,out,dag); // -0.5 is included
axpy(out,Nd*1.0-M5,in,out);
}
template <class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::Meooe(const FermionField &in, FermionField &out)
{
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
}
}
template <class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::MeooeDag(const FermionField &in, FermionField &out)
{
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
}
}
template <class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::Mooee(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
typename FermionField::scalar_type scal(Nd*1.0 + M5);
out = scal * in;
}
template <class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::MooeeDag(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
Mooee(in, out);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
out = (1.0/(Nd*1.0 + M5))*in;
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
MooeeInv(in,out);
}
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,441 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/TwoSpinWilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////
#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
} else { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
acceleratorSynchronise(); \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi);
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi); \
} \
acceleratorSynchronise();
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (!SE->_is_local ) { \
auto chi = coalescedRead(buf[SE->_offset],lane); \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi); \
nmu++; \
} \
acceleratorSynchronise();
#define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon) \
if (SE->_is_local ) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
} else { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
acceleratorSynchronise(); \
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
Recon(result, Uchi);
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
if (gamma == Dir) { \
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon); \
}
////////////////////////////////////////////////////////////////////
// All legs kernels ; comms then compute
////////////////////////////////////////////////////////////////////
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::DhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcSpinor chi;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
GENERIC_STENCIL_LEG(Xp,pauliProjXp,pauliAssign);
GENERIC_STENCIL_LEG(Yp,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG(Zp,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG(Xm,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG(Ym,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG(Zm,pauliProjZm,pauliAdd);
coalescedWrite(out[sF],result,lane);
};
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcSpinor chi;
// calcSpinor *chi_p;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
GENERIC_STENCIL_LEG(Xm,pauliProjXp,pauliAssign);
GENERIC_STENCIL_LEG(Ym,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG(Zm,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG(Xp,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG(Yp,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG(Zp,pauliProjZm,pauliAdd);
coalescedWrite(out[sF], result,lane);
};
////////////////////////////////////////////////////////////////////
// Interior kernels
////////////////////////////////////////////////////////////////////
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcSpinor chi;
// calcSpinor *chi_p;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_INT(Xp,pauliProjXp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Yp,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Zp,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Xm,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG_INT(Ym,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG_INT(Zm,pauliProjZm,pauliAdd);
coalescedWrite(out[sF], result,lane);
};
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
calcSpinor chi;
// calcSpinor *chi_p;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
result=Zero();
GENERIC_STENCIL_LEG_INT(Xm,pauliProjXp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Ym,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Zm,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Xp,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG_INT(Yp,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG_INT(Zp,pauliProjZm,pauliAdd);
coalescedWrite(out[sF], result,lane);
};
////////////////////////////////////////////////////////////////////
// Exterior kernels
////////////////////////////////////////////////////////////////////
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
// calcSpinor *chi_p;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_EXT(Xp,pauliProjXp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Yp,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Zp,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Xm,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Ym,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Zm,pauliProjZm,pauliAdd);
if ( nmu ) {
auto out_t = coalescedRead(out[sF],lane);
out_t = out_t + result;
coalescedWrite(out[sF],out_t,lane);
}
};
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
// calcSpinor *chi_p;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_EXT(Xm,pauliProjXp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Ym,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Zm,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Xp,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Yp,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Zp,pauliProjZm,pauliAdd);
if ( nmu ) {
auto out_t = coalescedRead(out[sF],lane);
out_t = out_t + result;
coalescedWrite(out[sF],out_t,lane);
}
};
#define DhopDirMacro(Dir,spProj,spRecon) \
template <class Impl> accelerator_inline \
void TwoSpinWilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteSpinor *buf, int sF, \
int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
{ \
typedef decltype(coalescedRead(in[0])) calcSpinor; \
calcSpinor chi; \
calcSpinor result; \
calcSpinor Uchi; \
StencilEntry *SE; \
int ptype; \
const int Nsimd = SiteSpinor::Nsimd(); \
const int lane=acceleratorSIMTlane(Nsimd); \
\
SE = st.GetEntry(ptype, dir, sF); \
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon); \
coalescedWrite(out[sF], result,lane); \
}
DhopDirMacro(Xp,pauliProjXp,pauliAssign);
DhopDirMacro(Yp,pauliProjYp,pauliAssign);
DhopDirMacro(Zp,pauliProjZp,pauliAssign);
DhopDirMacro(Xm,pauliProjXm,pauliAssign);
DhopDirMacro(Ym,pauliProjYm,pauliAssign);
DhopDirMacro(Zm,pauliProjZm,pauliAssign);
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcSpinor chi;
calcSpinor result;
calcSpinor Uchi;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
SE = st.GetEntry(ptype, dir, sF);
GENERIC_DHOPDIR_LEG(Xp,pauliProjXp,pauliAssign);
GENERIC_DHOPDIR_LEG(Yp,pauliProjYp,pauliAssign);
GENERIC_DHOPDIR_LEG(Zp,pauliProjZp,pauliAssign);
GENERIC_DHOPDIR_LEG(Xm,pauliProjXm,pauliAssign);
GENERIC_DHOPDIR_LEG(Ym,pauliProjYm,pauliAssign);
GENERIC_DHOPDIR_LEG(Zm,pauliProjZm,pauliAssign);
coalescedWrite(out[sF], result,lane);
}
template <class Impl>
void TwoSpinWilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
int Nsite, const FermionField &in, std::vector<FermionField> &out)
{
autoView(U_v ,U,AcceleratorRead);
autoView(in_v ,in,AcceleratorRead);
autoView(st_v ,st,AcceleratorRead);
autoView(out_Xm,out[0],AcceleratorWrite);
autoView(out_Ym,out[1],AcceleratorWrite);
autoView(out_Zm,out[2],AcceleratorWrite);
autoView(out_Xp,out[4],AcceleratorWrite);
autoView(out_Yp,out[5],AcceleratorWrite);
autoView(out_Zp,out[6],AcceleratorWrite);
auto CBp=st.CommBuf();
accelerator_for(sss,Nsite*Ls,Simd::Nsimd(),{
int sU=sss/Ls;
int sF =sss;
DhopDirXm(st_v,U_v,CBp,sF,sU,in_v,out_Xm,0);
DhopDirYm(st_v,U_v,CBp,sF,sU,in_v,out_Ym,1);
DhopDirZm(st_v,U_v,CBp,sF,sU,in_v,out_Zm,2);
DhopDirXp(st_v,U_v,CBp,sF,sU,in_v,out_Xp,3);
DhopDirYp(st_v,U_v,CBp,sF,sU,in_v,out_Yp,4);
DhopDirZp(st_v,U_v,CBp,sF,sU,in_v,out_Zp,5);
});
}
template <class Impl>
void TwoSpinWilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
{
assert(dirdisp<=5);
assert(dirdisp>=0);
autoView(U_v ,U ,AcceleratorRead);
autoView(in_v ,in ,AcceleratorRead);
autoView(out_v,out,AcceleratorWrite);
autoView(st_v ,st ,AcceleratorRead);
auto CBp=st.CommBuf();
#define LoopBody(Dir) \
case Dir : \
accelerator_for(ss,Nsite,Simd::Nsimd(),{ \
for(int s=0;s<Ls;s++){ \
int sU=ss; \
int sF = s+Ls*sU; \
DhopDir##Dir(st_v,U_v,CBp,sF,sU,in_v,out_v,dirdisp);\
} \
}); \
break;
switch(gamma){
LoopBody(Xp);
LoopBody(Yp);
LoopBody(Zp);
LoopBody(Xm);
LoopBody(Ym);
LoopBody(Zm);
default:
assert(0);
break;
}
#undef LoopBody
}
#define KERNEL_CALLNB(A) \
const uint64_t NN = Nsite*Ls; \
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
int sF = ss; \
int sU = ss/Ls; \
TwoSpinWilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
});
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
#define KERNEL_CALL_EXT(A) \
const uint64_t sz = st.surface_list.size(); \
auto ptr = &st.surface_list[0]; \
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
int sF = ptr[ss]; \
int sU = sF/Ls; \
TwoSpinWilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
}); \
accelerator_barrier();
template <class Impl>
void TwoSpinWilsonKernels<Impl>::DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
{
autoView(U_v , U,AcceleratorRead);
autoView(in_v , in,AcceleratorRead);
autoView(out_v,out,AcceleratorWrite);
autoView(st_v , st,AcceleratorRead);
if( interior && exterior ) {
acceleratorFenceComputeStream();
KERNEL_CALL(GenericDhopSite);
return;
} else if( interior ) {
KERNEL_CALLNB(GenericDhopSiteInt);
return;
} else if( exterior ) {
// // dependent on result of merge
acceleratorFenceComputeStream();
KERNEL_CALL_EXT(GenericDhopSiteExt);
return;
}
assert(0 && " Kernel optimisation case not covered ");
}
template <class Impl>
void TwoSpinWilsonKernels<Impl>::DhopDagKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
{
autoView(U_v ,U,AcceleratorRead);
autoView(in_v ,in,AcceleratorRead);
autoView(out_v,out,AcceleratorWrite);
autoView(st_v ,st,AcceleratorRead);
if( interior && exterior ) {
acceleratorFenceComputeStream();
KERNEL_CALL(GenericDhopSiteDag);
return;
} else if( interior ) {
KERNEL_CALLNB(GenericDhopSiteDagInt); return;
} else if( exterior ) {
// Dependent on result of merge
acceleratorFenceComputeStream();
KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;
}
assert(0 && " Kernel optimisation case not covered ");
}
#undef KERNEL_CALLNB
#undef KERNEL_CALL
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,61 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h>
NAMESPACE_BEGIN(Grid);
// S-direction is INNERMOST and takes no part in the parity.
const std::vector<int> TwoSpinWilsonFermion3plus1DStatic::directions (TwoSpinWilsonFermion3plus1DStatic::MakeDirections());
const std::vector<int> TwoSpinWilsonFermion3plus1DStatic::displacements(TwoSpinWilsonFermion3plus1DStatic::MakeDisplacements());
std::vector<int> TwoSpinWilsonFermion3plus1DStatic::MakeDirections (void)
{
std::vector<int> directions(2*Nd);
for(int d=0;d<Nd;d++){
directions[d] = d+1;
directions[d+Nd] = d+1;
}
return directions;
}
std::vector<int> TwoSpinWilsonFermion3plus1DStatic::MakeDisplacements(void)
{
std::vector<int> displacements(2*Nd);
for(int d=0;d<Nd;d++){
displacements[d] = +1;
displacements[d+Nd] = -1;
}
return displacements;
}
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,40 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/TwoSpinWilsonFermion3plus1DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class TwoSpinWilsonFermion3plus1D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,40 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/TwoSpinWilsonKernelsImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class TwoSpinWilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

220
Grid/qcd/spin/Pauli.h Normal file
View File

@@ -0,0 +1,220 @@
#ifndef GRID_QCD_PAULI_H
#define GRID_QCD_PAULI_H
#include <array>
NAMESPACE_BEGIN(Grid);
//
/*
* Pauli basis
* sx sy sz ident
* (0 1) , (0 -i) , ( 1 0 )
* (1 0) (i 0) ( 0 -1)
*
* These are hermitian.
*
* Also supply wilson "projectors" (1+/-sx), (1+/-sy), (1+/-sz)
*
* spPauliProjXm
* spPauliProjYm etc...
*/
class Pauli {
public:
GRID_SERIALIZABLE_ENUM(Algebra, undef,
SigmaX , 0,
MinusSigmaX , 1,
SigmaY , 2,
MinusSigmaY , 3,
SigmaZ , 4,
MinusSigmaZ , 5,
Identity , 6,
MinusIdentity , 7);
static constexpr unsigned int nPauli = 8;
static const std::array<const char *, nPauli> name;
static const std::array<std::array<Algebra, nPauli>, nPauli> mul;
static const std::array<Algebra, nPauli> adj;
static const std::array<const Pauli, 4> gmu;
static const std::array<const Pauli, 16> gall;
Algebra g;
public:
accelerator Pauli(Algebra initg): g(initg) {}
};
#define CopyImplementation(iTemplate,multPauli,multFlavour) \
template<class vtype> \
accelerator_inline void multPauli(iTemplate<vtype, Nhs> &ret, const iTemplate<vtype, Nhs> &rhs) { \
multFlavour(ret,rhs); \
}
CopyImplementation(iVector,multPauliSigmaX,multFlavourSigmaX);
CopyImplementation(iMatrix,lmultPauliSigmaX,lmultFlavourSigmaX);
CopyImplementation(iMatrix,rmultPauliSigmaX,rmultFlavourSigmaX);
CopyImplementation(iVector,multPauliMinusSigmaX ,multFlavourMinusSigmaX);
CopyImplementation(iMatrix,lmultPauliMinusSigmaX,lmultFlavourMinusSigmaX);
CopyImplementation(iMatrix,rmultPauliMinusSigmaX,rmultFlavourMinusSigmaX);
CopyImplementation(iVector,multPauliSigmaY,multFlavourSigmaY);
CopyImplementation(iMatrix,lmultPauliSigmaY,lmultFlavourSigmaY);
CopyImplementation(iMatrix,rmultPauliSigmaY,rmultFlavourSigmaY);
CopyImplementation(iVector,multPauliMinusSigmaY ,multFlavourMinusSigmaY);
CopyImplementation(iMatrix,lmultPauliMinusSigmaY,lmultFlavourMinusSigmaY);
CopyImplementation(iMatrix,rmultPauliMinusSigmaY,rmultFlavourMinusSigmaY);
CopyImplementation(iVector,multPauliSigmaZ,multFlavourSigmaZ);
CopyImplementation(iMatrix,lmultPauliSigmaZ,lmultFlavourSigmaZ);
CopyImplementation(iMatrix,rmultPauliSigmaZ,rmultFlavourSigmaZ);
CopyImplementation(iVector,multPauliMinusSigmaZ ,multFlavourMinusSigmaZ);
CopyImplementation(iMatrix,lmultPauliMinusSigmaZ,lmultFlavourMinusSigmaZ);
CopyImplementation(iMatrix,rmultPauliMinusSigmaZ,rmultFlavourMinusSigmaZ);
CopyImplementation(iVector,multPauliIdentity,multFlavourIdentity);
CopyImplementation(iMatrix,lmultPauliIdentity,lmultFlavourIdentity);
CopyImplementation(iMatrix,rmultPauliIdentity,rmultFlavourIdentity);
CopyImplementation(iVector,multPauliMinusIdentity ,multFlavourMinusIdentity);
CopyImplementation(iMatrix,lmultPauliMinusIdentity,lmultFlavourMinusIdentity);
CopyImplementation(iMatrix,rmultPauliMinusIdentity,rmultFlavourMinusIdentity);
/*
* sx sy sz ident
* (0 1) , (0 -i) , ( 1 0 )
* (1 0) (i 0) ( 0 -1)
*/
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjXp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=fspin(0)+fspin(1);
hspin(1)=fspin(1)+fspin(0);
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjXm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=fspin(0)-fspin(1);
hspin(1)=fspin(1)-fspin(0);
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjYp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=fspin(0)-timesI(fspin(1));
hspin(1)=fspin(1)+timesI(fspin(0));
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjYm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=fspin(0)+timesI(fspin(1));
hspin(1)=fspin(1)-timesI(fspin(0));
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjZp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=fspin(0)+fspin(0);
hspin(1)=Zero();
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjZm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=Zero();
hspin(1)=fspin(1)+fspin(1);
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliAssign(iVector<vtype,Nhs> &fspin,const iVector<vtype,Nhs> &hspin)
{
fspin = hspin;
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliAdd (iVector<vtype,Nhs> &fspin,const iVector<vtype,Nhs> &hspin)
{
fspin = fspin + hspin;
}
template<class vtype>
accelerator_inline auto operator*(const Pauli &G, const iVector<vtype, Nhs> &arg)
->typename std::enable_if<matchGridTensorIndex<iVector<vtype, Nhs>, PauliIndex>::value, iVector<vtype, Nhs>>::type
{
iVector<vtype, Nhs> ret;
switch (G.g)
{
case Pauli::Algebra::SigmaX:
multPauliSigmaX(ret, arg); break;
case Pauli::Algebra::MinusSigmaX:
multPauliMinusSigmaX(ret, arg); break;
case Pauli::Algebra::SigmaY:
multPauliSigmaY(ret, arg); break;
case Pauli::Algebra::MinusSigmaY:
multPauliMinusSigmaY(ret, arg); break;
case Pauli::Algebra::SigmaZ:
multPauliSigmaZ(ret, arg); break;
case Pauli::Algebra::MinusSigmaZ:
multPauliMinusSigmaZ(ret, arg); break;
case Pauli::Algebra::Identity:
multPauliIdentity(ret, arg); break;
case Pauli::Algebra::MinusIdentity:
multPauliMinusIdentity(ret, arg); break;
default: assert(0);
}
return ret;
}
template<class vtype>
accelerator_inline auto operator*(const Pauli &G, const iMatrix<vtype, Nhs> &arg)
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Nhs>, PauliIndex>::value, iMatrix<vtype, Nhs>>::type
{
iMatrix<vtype, Nhs> ret;
switch (G.g)
{
case Pauli::Algebra::SigmaX:
lmultPauliSigmaX(ret, arg); break;
case Pauli::Algebra::MinusSigmaX:
lmultPauliMinusSigmaX(ret, arg); break;
case Pauli::Algebra::SigmaY:
lmultPauliSigmaY(ret, arg); break;
case Pauli::Algebra::MinusSigmaY:
lmultPauliMinusSigmaY(ret, arg); break;
case Pauli::Algebra::SigmaZ:
lmultPauliSigmaZ(ret, arg); break;
case Pauli::Algebra::MinusSigmaZ:
lmultPauliMinusSigmaZ(ret, arg); break;
case Pauli::Algebra::Identity:
lmultPauliIdentity(ret, arg); break;
case Pauli::Algebra::MinusIdentity:
lmultPauliMinusIdentity(ret, arg); break;
default: assert(0);
}
return ret;
}
template<class vtype>
accelerator_inline auto operator*(const iMatrix<vtype, Nhs> &arg, const Pauli &G)
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Nhs>, PauliIndex>::value, iMatrix<vtype, Nhs>>::type
{
iMatrix<vtype, Nhs> ret;
switch (G.g)
{
case Pauli::Algebra::SigmaX:
rmultPauliSigmaX(ret, arg); break;
case Pauli::Algebra::MinusSigmaX:
rmultPauliMinusSigmaX(ret, arg); break;
case Pauli::Algebra::SigmaY:
rmultPauliSigmaY(ret, arg); break;
case Pauli::Algebra::MinusSigmaY:
rmultPauliMinusSigmaY(ret, arg); break;
case Pauli::Algebra::SigmaZ:
rmultPauliSigmaZ(ret, arg); break;
case Pauli::Algebra::MinusSigmaZ:
rmultPauliMinusSigmaZ(ret, arg); break;
case Pauli::Algebra::Identity:
rmultPauliIdentity(ret, arg); break;
case Pauli::Algebra::MinusIdentity:
rmultPauliMinusIdentity(ret, arg); break;
default: assert(0);
}
return ret;
}
NAMESPACE_END(Grid);
#endif // GRID_QCD_GAMMA_H

5
TODO
View File

@@ -1,3 +1,8 @@
* Clean up the extract merge and replace with insertLane/extractLane
-----
i) Refine subspace with HDCG & recompute i) Refine subspace with HDCG & recompute
ii) Block Lanczos in coarse space ii) Block Lanczos in coarse space
iii) Batched block project in the operator computation iii) Batched block project in the operator computation

View File

@@ -0,0 +1,93 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/debug/Test_icosahedron.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
const int MyNd=3;
template<typename vtype> using iIcosahedralLorentzComplex = iVector<iScalar<iScalar<vtype> >, MyNd+1 > ;
typedef iIcosahedralLorentzComplex<ComplexD > IcosahedralLorentzComplexD;
typedef iIcosahedralLorentzComplex<vComplexD> vIcosahedralLorentzComplexD;
typedef Lattice<vIcosahedralLorentzComplexD> LatticeIcosahedralLorentzComplexD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
const int L=8;
const int Npatch = num_icosahedron_tiles;
// Put SIMD all in time direction
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout({1,1,vComplexD::Nsimd(),1});
Coordinate mpi_layout = GridDefaultMpi();
std::cout << GridLogMessage << " mpi "<<mpi_layout<<std::endl;
std::cout << GridLogMessage << " simd "<<simd_layout<<std::endl;
std::cout << GridLogMessage << " latt "<<latt_size<<std::endl;
GridCartesianCrossIcosahedron EdgeGrid(latt_size,simd_layout,mpi_layout,IcosahedralEdges);
std::cout << GridLogMessage << " Created edge grid "<<std::endl;
GridCartesianCrossIcosahedron VertexGrid(latt_size,simd_layout,mpi_layout,IcosahedralVertices);
std::cout << GridLogMessage << " Created vertex grid "<<std::endl;
LatticeIcosahedralLorentzComplexD Umu(&EdgeGrid);
LatticeComplex Phi(&VertexGrid);
std::cout << GridLogMessage << " Created two fields "<<std::endl;
Phi = Zero();
Umu = Zero();
std::cout << GridLogMessage << " Zeroed two fields "<<std::endl;
ComplexD one (1.0);
Phi = one;
Umu = one;
std::cout << GridLogMessage << " V = "<<norm2(Phi)<<std::endl;
std::cout << GridLogMessage << " Expect "<<latt_size[0]*latt_size[1]*latt_size[2]*10+2*latt_size[2]<<std::endl;
std::cout << GridLogMessage << " E = "<<norm2(Umu)<<std::endl;
std::cout << GridLogMessage << " Expect "<<latt_size[0]*latt_size[1]*latt_size[2]*10*4<<std::endl;
// std::cout << " Umu "<<Umu<<std::endl;
// std::cout << " Phi "<<Phi<<std::endl;
LatticePole(Phi,South);
std::cout << " Phi South Pole set\n"<<Phi<<std::endl;
LatticePole(Phi,North);
std::cout << " Phi North Pole set\n"<<Phi<<std::endl;
for(int mu=0;mu<VertexGrid._ndimension;mu++){
std::cout << " Calling lattice coordinate mu="<<mu<<std::endl;
LatticeCoordinate(Phi,mu);
std::cout << " Phi coor mu="<<mu<<"\n"<<Phi<<std::endl;
}
Grid_finalize();
}