1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-10-13 12:44:42 +01:00

Beginnings of S2xR

This commit is contained in:
Peter Boyle
2025-10-07 16:11:06 -04:00
parent 35e10a1159
commit 85b2bd4c93
19 changed files with 2324 additions and 17 deletions

View File

@@ -31,5 +31,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/cartesian/Cartesian_base.h>
#include <Grid/cartesian/Cartesian_full.h>
#include <Grid/cartesian/Cartesian_red_black.h>
#include <Grid/cartesian/CartesianCrossIcosahedron.h>
#endif

View File

@@ -0,0 +1,197 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/cartesian/CartesianCrossIcosahedron.h
Copyright (C) 2025
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////
// Grid Support.
/////////////////////////////////////////////////////////////////////////////////////////
enum IcosahedralMeshType {
IcosahedralVertices,
IcosahedralEdges
} ;
enum NorthSouth {
North = 1,
South = 0
};
const int num_icosahedron_tiles = 10;
class GridCartesianCrossIcosahedron: public GridCartesian {
public:
IcosahedralMeshType meshType;
IcosahedralMeshType MeshType(void) { return meshType; };
/////////////////////////////////////////////////////////////////////////
// Constructor takes a parent grid and possibly subdivides communicator.
/////////////////////////////////////////////////////////////////////////
/*
GridCartesian(const Coordinate &dimensions,
const Coordinate &simd_layout,
const Coordinate &processor_grid,
const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
{
assert(0); // No subdivision
}
GridCartesian(const Coordinate &dimensions,
const Coordinate &simd_layout,
const Coordinate &processor_grid,
const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
{
assert(0); // No subdivision
}
*/
/////////////////////////////////////////////////////////////////////////
// Construct from comm world
/////////////////////////////////////////////////////////////////////////
GridCartesianCrossIcosahedron(const Coordinate &dimensions,
const Coordinate &simd_layout,
const Coordinate &processor_grid,
IcosahedralMeshType _meshType) : GridCartesian(dimensions,simd_layout,processor_grid)
{
meshType = _meshType;
Coordinate S2dimensions=dimensions;
Coordinate S2simd =simd_layout;
Coordinate S2procs =processor_grid;
assert(simd_layout[0]==1); // Force simd into perpendicular dimensions
assert(simd_layout[1]==1); // to avoid pole storage complexity interacting with SIMD.
assert(dimensions[_ndimension-1]==num_icosahedron_tiles);
assert(processor_grid[_ndimension-1]<=2); // Keeps the patches that need a pole on the same node
// allocate the pole storage if we are seeking vertex domain data
if ( meshType == IcosahedralVertices ) {
InitPoles();
}
}
virtual ~GridCartesianCrossIcosahedron() = default;
////////////////////////////////////////////////
// Use to decide if a given grid is icosahedral
////////////////////////////////////////////////
int hasNorthPole;
int hasSouthPole;
int northPoleOsite;
int southPoleOsite;
int northPoleOsites;
int southPoleOsites;
virtual int Icosahedral(void) override { return 1;}
virtual int ownsNorthPole(void) const override { return hasNorthPole; };
virtual int NorthPoleOsite(void) const override { return northPoleOsite; };
virtual int NorthPoleOsites(void) const override { return northPoleOsites; };
virtual int ownsSouthPole(void) const override { return hasSouthPole; };
virtual int SouthPoleOsite(void) const override { return southPoleOsite; };
virtual int SouthPoleOsites(void) const override { return southPoleOsites; };
void InitPoles(void)
{
int Ndm1 = _ndimension-1;
///////////////////////
// Add the extra pole storage
///////////////////////
// Vertices = 1x LxLx D1...Dn + 2.D1...Dn
// Start after the LxL and don't include the 10 patch dim
int OrthogSize = 1;
for (int d = 2; d < Ndm1; d++) {
OrthogSize *= _gdimensions[d];
}
_fsites += OrthogSize*2;
_gsites += OrthogSize*2;
// Simd reduced sizes are multiplied up.
// If the leading LxL are simd-ized, the vector objects will contain "redundant" lanes
// which should contain identical north (south) pole data
OrthogSize = 1;
for (int d = 2; d < Ndm1; d++) {
OrthogSize *= _rdimensions[d];
}
// Grow the local volume to hold pole data
// on rank (0,0) in the LxL planes
// since SIMD must be placed in the orthogonal directions
Coordinate pcoor = this->ThisProcessorCoor();
Coordinate pgrid = this->ProcessorGrid();
const int xdim=0;
const int ydim=1;
/*
*
* /\/\/\/\/\
* /\/\/\/\/\/
* \/\/\/\/\/
*
* y
* /
* \x
*
* Labelling patches as 5 6 7 8 9
* 0 1 2 3 4
*
* Will ban distribution of the patch dimension by more than 2.
*
* Hence all 5 patches associated with the pole must have the
* appropriate "corner" of the patch L^2 located on the SAME rank.
*/
if( (pcoor[xdim]==pgrid[xdim]-1) && (pcoor[ydim]==0) && (pcoor[Ndm1]==0) ){
hasSouthPole =1;
southPoleOsite=this->_osites;
southPoleOsites=OrthogSize;
this->_osites += OrthogSize;
} else {
hasSouthPole =0;
southPoleOsites=0;
southPoleOsite=0;
}
if( (pcoor[xdim]==0) && (pcoor[ydim]==pgrid[ydim]-1) && (pcoor[Ndm1]==pgrid[Ndm1]-1) ){
hasNorthPole =1;
northPoleOsite=this->_osites;
northPoleOsites=OrthogSize;
this->_osites += OrthogSize;
} else {
hasNorthPole =0;
northPoleOsites=0;
northPoleOsite=0;
}
std::cout << "Icosahedral vertex field volume " << this->_osites<<std::endl;
std::cout << "Icosahedral south pole offset " << this->southPoleOsite<<std::endl;
std::cout << "Icosahedral north pole offset " << this->northPoleOsite<<std::endl;
std::cout << "Icosahedral south pole size " << this->southPoleOsites<<std::endl;
std::cout << "Icosahedral north pole size " << this->northPoleOsites<<std::endl;
};
};
NAMESPACE_END(Grid);

View File

@@ -86,11 +86,20 @@ public:
public:
// Icosahedral decisions
virtual int Icosahedral(void) { return 0;}
virtual int ownsNorthPole(void) const { return 0; };
virtual int ownsSouthPole(void) const { return 0; };
virtual int NorthPoleOsite(void) const { return 0; };
virtual int SouthPoleOsite(void) const { return 0; };
virtual int NorthPoleOsites(void) const { std::cout << "base osites" <<std::endl;return 0; };
virtual int SouthPoleOsites(void) const { std::cout << "base osites" <<std::endl;return 0; };
////////////////////////////////////////////////////////////////
// Checkerboarding interface is virtual and overridden by
// GridCartesian / GridRedBlackCartesian
////////////////////////////////////////////////////////////////
virtual int Icosahedral(void) { return 0;}
virtual int CheckerBoarded(int dim) =0;
virtual int CheckerBoard(const Coordinate &site)=0;
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
@@ -177,6 +186,8 @@ public:
}
return permute_type;
}
////////////////////////////////////////////////////////////////
// Array sizing queries
////////////////////////////////////////////////////////////////

View File

@@ -34,6 +34,8 @@ NAMESPACE_BEGIN(Grid);
const int Cshift_verbose=0;
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
{
assert(!rhs.Grid()->Icosahedral());
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;

View File

@@ -30,6 +30,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid);
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
{
assert(!rhs.Grid()->Icosahedral());
Lattice<vobj> ret(rhs.Grid());
ret.Checkerboard() = rhs.Grid()->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension);
Cshift_local(ret,rhs,dimension,shift);

View File

@@ -373,14 +373,17 @@ public:
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
typedef typename vobj::scalar_object sobj;
for(int64_t g=0;g<o.Grid()->_gsites;g++){
uint64_t gsites=1;
uint64_t polesites=0;
for(int d=0;d<o.Grid()->_ndimension;d++) gsites *= o.Grid()->_gdimensions[d];
for(int64_t g=0;g<gsites;g++){
Coordinate gcoor;
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
sobj ss;
peekSite(ss,o,gcoor);
stream<<"[";
stream<<"["<< g<<" : ";
for(int d=0;d<gcoor.size();d++){
stream<<gcoor[d];
if(d!=gcoor.size()-1) stream<<",";
@@ -388,6 +391,41 @@ template<class vobj> std::ostream& operator<< (std::ostream& stream, const Latti
stream<<"]\t";
stream<<ss<<std::endl;
}
if ( o.Grid()->Icosahedral() ) {
uint64_t psites=1;
Coordinate perpdims;
for(int d=2;d<o.Grid()->_ndimension-1;d++){
int pd=o.Grid()->_gdimensions[d];
psites*=pd;
perpdims.push_back(pd);
}
for(uint64_t p=0;p<psites;p++){
sobj ss;
Coordinate orthog;
Lexicographic::CoorFromIndex(orthog,p,perpdims);
peekPole(ss,o,orthog,South);
stream<<"[ SouthPole : ";
for(int d=0;d<orthog.size();d++){
stream<<orthog[d];
if(d!=orthog.size()-1) stream<<",";
}
stream<<"]\t";
stream<<ss<<std::endl;
}
for(uint64_t p=0;p<psites;p++){
sobj ss;
Coordinate orthog;
Lexicographic::CoorFromIndex(orthog,p,perpdims);
peekPole(ss,o,orthog,North);
stream<<"[ NorthPole : ";
for(int d=0;d<orthog.size();d++){
stream<<orthog[d];
if(d!=orthog.size()-1) stream<<",";
}
stream<<"]\t";
stream<<ss<<std::endl;
}
}
return stream;
}

View File

@@ -34,11 +34,18 @@ template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
typedef typename iobj::scalar_type scalar_type;
typedef typename iobj::vector_type vector_type;
l=Zero();
GridBase *grid = l.Grid();
int Nsimd = grid->iSites();
int cartesian_vol = grid->oSites();
if ( grid->Icosahedral() ) {
cartesian_vol = cartesian_vol - grid->NorthPoleOsites()-grid->SouthPoleOsites();
}
{
autoView(l_v, l, CpuWrite);
thread_for( o, grid->oSites(), {
thread_for( o, cartesian_vol, {
vector_type vI;
Coordinate gcoor;
ExtractBuffer<scalar_type> mergebuf(Nsimd);
@@ -49,7 +56,62 @@ template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
merge<vector_type,scalar_type>(vI,mergebuf);
l_v[o]=vI;
});
}
if (grid->Icosahedral()) {
uint64_t psites=1;
Coordinate perpdims;
typename iobj::scalar_object ss;
for(int d=2;d<grid->_ndimension-1;d++){
int pd=grid->_gdimensions[d];
psites*=pd;
perpdims.push_back(pd);
}
for(uint64_t p=0;p<psites;p++){
Coordinate orthog;
Lexicographic::CoorFromIndex(orthog,p,perpdims);
int icoor;
if ( mu>=2 && mu < grid->_ndimension-1) {
icoor = orthog[mu-2];
} else {
icoor = -1;
}
ss=scalar_type(icoor);
pokePole(ss,l,orthog,South);
pokePole(ss,l,orthog,North);
}
}
};
template<class iobj> inline void LatticePole(Lattice<iobj> &l,NorthSouth pole)
{
typedef typename iobj::scalar_object sobj;
typedef typename iobj::scalar_type scalar_type;
typedef typename iobj::vector_type vector_type;
GridBase *grid = l.Grid();
l=Zero();
if (grid->Icosahedral()) {
uint64_t psites=1;
Coordinate perpdims;
sobj ss;
scalar_type one(1.0);
ss=one;
for(int d=2;d<l.Grid()->_ndimension-1;d++){
int pd=l.Grid()->_gdimensions[d];
psites*=pd;
perpdims.push_back(pd);
}
for(uint64_t p=0;p<psites;p++){
Coordinate orthog;
Lexicographic::CoorFromIndex(orthog,p,perpdims);
pokePole(ss,l,orthog,pole);
}
}
};
NAMESPACE_END(Grid);

View File

@@ -141,7 +141,7 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
ExtractBuffer<sobj> buf(Nsimd);
autoView( l_v , l, CpuWrite);
autoView( l_v , l, CpuRead);
extract(l_v[odx],buf);
s = buf[idx];
@@ -151,6 +151,134 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
return;
};
// zero for south pole, one for north pole
template<class vobj,class sobj>
void peekPole(sobj &s,const Lattice<vobj> &l,const Coordinate &orthog,NorthSouth isNorth)
{
s=Zero();
GridBase *grid=l.Grid();
assert(grid->Icosahedral());
int Nsimd = grid->Nsimd();
int rank;
int Ndm1 = grid->_ndimension-1;
Coordinate pgrid = grid->ProcessorGrid();
const int xdim=0;
const int ydim=1;
const int pdim=Ndm1;
int64_t pole_osite;
int64_t pole_isite;
Coordinate rdims;
Coordinate idims;
Coordinate ocoor;
Coordinate icoor;
Coordinate pcoor(grid->_ndimension);
for(int d=2;d<Ndm1;d++){
int dd=d-2;
rdims.push_back(grid->_rdimensions[d]);
idims.push_back(grid->_simd_layout[d]);
icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]);
ocoor.push_back(orthog[dd]%grid->_rdimensions[d]);
pcoor[d] = orthog[dd]/grid->_ldimensions[d];
}
Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims);
Lexicographic::IndexFromCoor(icoor,pole_isite,idims);
int64_t osite;
if(isNorth == North){
pcoor[xdim] = 0;
pcoor[ydim] = pgrid[ydim]-1;
pcoor[Ndm1] = pgrid[Ndm1]-1;
osite = pole_osite + grid->NorthPoleOsite();
} else {
pcoor[xdim] = pgrid[xdim]-1;
pcoor[ydim] = 0;
pcoor[Ndm1] = 0;
osite = pole_osite + grid->SouthPoleOsite();
}
rank = grid->RankFromProcessorCoor(pcoor);
if ( rank == grid->ThisRank() ) {
ExtractBuffer<sobj> buf(Nsimd);
autoView( l_v , l, CpuWrite);
extract(l_v[osite],buf);
s = buf[pole_isite];
}
grid->Broadcast(rank,s);
return;
};
template<class vobj,class sobj>
void pokePole(const sobj &s,Lattice<vobj> &l,const Coordinate &orthog,NorthSouth isNorth)
{
GridBase *grid=l.Grid();
assert(grid->Icosahedral());
grid->Broadcast(grid->BossRank(),s);
int Nsimd = grid->Nsimd();
int rank;
int Ndm1 = grid->_ndimension-1;
Coordinate pgrid = grid->ProcessorGrid();
const int xdim=0;
const int ydim=1;
const int pdim=Ndm1;
int64_t pole_osite;
int64_t pole_isite;
Coordinate rdims;
Coordinate idims;
Coordinate ocoor;
Coordinate icoor;
Coordinate pcoor(grid->_ndimension,0);
for(int d=2;d<Ndm1;d++){
int dd = d-2;
rdims.push_back(grid->_rdimensions[d]);
idims.push_back(grid->_simd_layout[d]);
icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]);
ocoor.push_back(orthog[dd]%grid->_rdimensions[d]);
pcoor[d] = orthog[dd]/grid->_ldimensions[d];
int o = orthog[dd];
int r = grid->_rdimensions[d];
int omr = o % r;
}
Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims);
Lexicographic::IndexFromCoor(icoor,pole_isite,idims);
int64_t osite;
if(isNorth ==North){
pcoor[xdim] = 0;
pcoor[ydim] = pgrid[ydim]-1;
pcoor[Ndm1] = pgrid[Ndm1]-1;
osite = pole_osite + grid->NorthPoleOsite();
} else {
pcoor[xdim] = pgrid[xdim]-1;
pcoor[ydim] = 0;
pcoor[Ndm1] = 0;
osite = pole_osite + grid->SouthPoleOsite();
}
rank = grid->RankFromProcessorCoor(pcoor);
// extract-modify-merge cycle is easiest way and this is not perf critical
if ( rank == grid->ThisRank() ) {
ExtractBuffer<sobj> buf(Nsimd);
autoView( l_v , l, CpuWrite);
extract(l_v[osite],buf);
buf[pole_isite] = s;
merge(l_v[osite],buf);
}
return;
};
//////////////////////////////////////////////////////////
// Peek a scalar object from the SIMD array
//////////////////////////////////////////////////////////
@@ -179,7 +307,7 @@ inline void peekLocalSite(sobj &s,const LatticeView<vobj> &l,Coordinate &site)
for(int w=0;w<words;w++){
pt[w] = getlane(vp[w],idx);
}
// std::cout << "peekLocalSite "<<site<<" "<<odx<<","<<idx<<" "<<s<<std::endl;
return;
};
template<class vobj,class sobj>

View File

@@ -0,0 +1,175 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma one
NAMESPACE_BEGIN(Grid);
class TwoSpinWilsonFermion3plus1DStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static const std::vector<int> directions;
static const std::vector<int> displacements;
static constexpr int npoint = 6;
static std::vector<int> MakeDirections(void);
static std::vector<int> MakeDisplacements(void);
};
template<class Impl>
class TwoSpinWilsonFermion3plus1D : public TwoSpinWilsonKernels<Impl>, public TwoSpinWilsonFermion3plus1DStatic
{
public:
INHERIT_IMPL_TYPES(Impl);
typedef TwoSpinWilsonKernels<Impl> Kernels;
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
int Dirichlet;
Coordinate Block;
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _ThreeDimGrid ;}
GridBase *GaugeRedBlackGrid(void) { return _ThreeDimRedBlackGrid ;}
GridBase *FermionGrid(void) { return _FourDimGrid;}
GridBase *FermionRedBlackGrid(void) { return _FourDimRedBlackGrid;}
// full checkerboard operations; leave unimplemented as abstract for now
virtual void M (const FermionField &in, FermionField &out){assert(0);};
virtual void Mdag (const FermionField &in, FermionField &out){assert(0);};
// half checkerboard operations; leave unimplemented as abstract for now
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
// These can be overridden by fancy 5d chiral action
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
// Implement hopping term non-hermitian hopping term; half cb or both
// Implement s-diagonal DW
void DW (const FermionField &in, FermionField &out,int dag);
void Dhop (const FermionField &in, FermionField &out,int dag);
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
void DhopComms (const FermionField &in, FermionField &out);
void DhopCalc (const FermionField &in, FermionField &out,uint64_t *ids);
// add a DhopComm
// -- suboptimal interface will presently trigger multiple comms.
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
void DhopDirAll(const FermionField &in,std::vector<FermionField> &out);
void DhopDirComms(const FermionField &in);
void DhopDirCalc(const FermionField &in, FermionField &out,int point);
///////////////////////////////////////////////////////////////
// New methods added
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
void DhopInternal(StencilImpl & st,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalOverlappedComms(StencilImpl & st,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
void DhopInternalSerialComms(StencilImpl & st,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out,
int dag);
// Constructors
TwoSpinWilsonFermion3plus1D(GaugeField &_Umu,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
GridCartesian &ThreeDimGrid,
GridRedBlackCartesian &ThreeDimRedBlackGrid,
double _M5,const ImplParams &p= ImplParams());
virtual void DirichletBlock(const Coordinate & block)
{
}
// DoubleStore
void ImportGauge(const GaugeField &_Umu);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
public:
// Add these to the support from Wilson
GridBase *_ThreeDimGrid;
GridBase *_ThreeDimRedBlackGrid;
GridBase *_FourDimGrid;
GridBase *_FourDimRedBlackGrid;
double M5;
int Ls;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
};
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,222 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////
// Single flavour four spinors with colour index
/////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation,class Options = CoeffReal >
class TwoSpinWilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
typedef typename Options::_Coeff_t Coeff_t;
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<Simd> SiteHalfCommSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef SimpleCompressor<SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef CartesianStencil<SiteSpinor, SiteSpinor, ImplParams> StencilImpl;
typedef const typename StencilImpl::View_type StencilView;
ImplParams Params;
TwoSpinWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){
};
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu)
{
auto UU = coalescedRead(U(mu));
mult(&phi(), &UU, &chi());
}
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu,
StencilEntry *SE,
StencilView &St)
{
multLink(phi,U,chi,mu);
}
template<class _SpinorField>
inline void multLinkField(_SpinorField & out,
const DoubledGaugeField &Umu,
const _SpinorField & phi,
int mu)
{
const int Nsimd = SiteHalfSpinor::Nsimd();
autoView( out_v, out, AcceleratorWrite);
autoView( phi_v, phi, AcceleratorRead);
autoView( Umu_v, Umu, AcceleratorRead);
typedef decltype(coalescedRead(out_v[0])) calcSpinor;
accelerator_for(sss,out.Grid()->oSites(),Nsimd,{
calcSpinor tmp;
multLink(tmp,Umu_v[sss],phi_v(sss),mu);
coalescedWrite(out_v[sss],tmp);
});
}
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{
reg = memory;
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &Uds,
const GaugeField &Umu)
{
typedef typename Simd::scalar_type scalar_type;
conformable(Uds.Grid(), GaugeGrid);
conformable(Umu.Grid(), GaugeGrid);
GaugeLinkField U(GaugeGrid);
GaugeLinkField tmp(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
////////////////////////////////////////////////////
// apply any boundary phase or twists
////////////////////////////////////////////////////
for (int mu = 0; mu < Nd; mu++) {
////////// boundary phase /////////////
auto pha = Params.boundary_phases[mu];
scalar_type phase( real(pha),imag(pha) );
int L = GaugeGrid->GlobalDimensions()[mu];
int Lmu = L - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu);
// apply any twists
RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L;
if ( theta != 0.0) {
scalar_type twphase(::cos(theta),::sin(theta));
U = twphase*U;
std::cout << GridLogMessage << " Twist ["<<mu<<"] "<< Params.twist_n_2pi_L[mu]<< " phase"<<phase <<std::endl;
}
tmp = where(coor == Lmu, phase * U, U);
PokeIndex<LorentzIndex>(Uds, tmp, mu);
U = adj(Cshift(U, mu, -1));
U = where(coor == 0, conjugate(phase) * U, U);
PokeIndex<LorentzIndex>(Uds, U, mu + Nd);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat.Grid());
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
inline void outerProductImpl(PropagatorField &mat, const FermionField &B, const FermionField &A){
mat = outerProduct(B,A);
}
inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
mat = TraceIndex<SpinIndex>(P);
}
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds)
{
for (int mu = 0; mu < Nd; mu++)
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu)
{
int Ls=Btilde.Grid()->_fdimensions[0];
autoView( mat_v , mat, AcceleratorWrite);
{
const int Nsimd = SiteSpinor::Nsimd();
autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead);
accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
int sU=sss;
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
ColorMatrixType sum;
zeroit(sum);
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
for(int spn=0;spn<Ns;spn++){ //sum over spin
auto bb = coalescedRead(Btilde_v[sF]()(spn) ); //color vector
auto aa = coalescedRead(Atilde_v[sF]()(spn) );
auto op = outerProduct(bb,aa);
sum = sum + op;
}
}
coalescedWrite(mat_v[sU](mu)(), sum);
});
}
}
};
typedef TwoSpinWilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplR; // Real.. whichever prec
typedef TwoSpinWilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplF; // Float
typedef TwoSpinWilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplD; // Double
typedef TwoSpinWilsonImpl<vComplexD2, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplD2; // Double
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,84 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Impl> class TwoSpinWilsonKernels : public FermionOperator<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
typedef AcceleratorVector<int,STENCIL_MAX> StencilVector;
public:
static void DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
static void DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
uint64_t *ids);
static void DhopDagKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
static void DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
int Nsite, const FermionField &in, std::vector<FermionField> &out) ;
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
private:
static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
static accelerator_inline void DhopDirXp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirYp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirZp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirXm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirYm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirZm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
public:
TwoSpinWilsonKernels(const ImplParams &p = ImplParams()) : Base(p){};
};
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,486 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/TwoSpinWilsonFermion2plus1D.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
#include <Grid/perfmon/PerfCount.h>
NAMESPACE_BEGIN(Grid);
// 5d lattice for DWF.
template<class Impl>
TwoSpinWilsonFermion3plus15D<Impl>::TwoSpinWilsonFermion3plus1D(GaugeField &_Umu,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
GridCartesian &ThreeDimGrid,
GridRedBlackCartesian &ThreeDimRedBlackGrid,
RealD _M5,const ImplParams &p) :
Kernels(p),
_FourDimGrid (&FourDimGrid),
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
_ThreeDimGrid (&ThreeDimGrid),
_ThreeDimRedBlackGrid(&ThreeDimRedBlackGrid),
Stencil (_FourDimGrid,npoint,Even,directions,displacements,p),
StencilEven(_FourDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even
StencilOdd (_FourDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd
M5(_M5),
Umu(_ThreeDimGrid),
UmuEven(_ThreeDimRedBlackGrid),
UmuOdd (_ThreeDimRedBlackGrid),
_tmp(&FourDimRedBlackGrid),
Dirichlet(0)
{
// some assertions
assert(FourDimGrid._ndimension==Nd+1);
assert(ThreeDimGrid._ndimension==Nd);
assert(ThreeDimRedBlackGrid._ndimension==Nd);
assert(FourDimRedBlackGrid._ndimension==Nd+1);
assert(FourDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
// extent of fifth dim and not spread out
Ls=FourDimGrid._fdimensions[0];
assert(FourDimRedBlackGrid._fdimensions[0]==Ls);
assert(FourDimGrid._processors[0] ==1);
assert(FourDimRedBlackGrid._processors[0] ==1);
// Other dimensions must match the decomposition of the four-D fields
for(int d=0;d<Nd;d++){
assert(FourDimGrid._processors[d+1] ==ThreeDimGrid._processors[d]);
assert(FourDimRedBlackGrid._processors[d+1] ==ThreeDimGrid._processors[d]);
assert(ThreeDimRedBlackGrid._processors[d] ==ThreeDimGrid._processors[d]);
assert(FourDimGrid._fdimensions[d+1] ==ThreeDimGrid._fdimensions[d]);
assert(FourDimRedBlackGrid._fdimensions[d+1]==ThreeDimGrid._fdimensions[d]);
assert(ThreeDimRedBlackGrid._fdimensions[d] ==ThreeDimGrid._fdimensions[d]);
assert(FourDimGrid._simd_layout[d+1] ==ThreeDimGrid._simd_layout[d]);
assert(FourDimRedBlackGrid._simd_layout[d+1]==ThreeDimGrid._simd_layout[d]);
assert(ThreeDimRedBlackGrid._simd_layout[d] ==ThreeDimGrid._simd_layout[d]);
}
if ( p.dirichlet.size() == Nd+1) {
Coordinate block = p.dirichlet;
for(int d=0;d<Nd+1;d++) {
if ( block[d] ){
Dirichlet = 1;
std::cout << GridLogMessage << " WilsonFermion: non-trivial Dirichlet condition "<< block << std::endl;
std::cout << GridLogMessage << " WilsonFermion: partial Dirichlet "<< p.partialDirichlet << std::endl;
Block = block;
}
}
} else {
Coordinate block(Nd+1,0);
Block = block;
}
// Dimension zero of the five-d is the Ls direction
assert(FourDimRedBlackGrid._simd_layout[0]==1);
assert(FourDimGrid._simd_layout[0] ==1);
// Allocate the required comms buffer
ImportGauge(_Umu);
// Build lists of exterior only nodes
int LLs = FourDimGrid._rdimensions[0];
int vol3;
vol3=ThreeDimGrid.oSites();
Stencil.BuildSurfaceList(LLs,vol3);
vol3=ThreeDimRedBlackGrid.oSites();
StencilEven.BuildSurfaceList(LLs,vol3);
StencilOdd.BuildSurfaceList(LLs,vol3);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::ImportGauge(const GaugeField &_Umu)
{
GaugeField HUmu(_Umu.Grid());
HUmu = _Umu*(-0.5);
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
pickCheckerboard(Even,UmuEven,Umu);
pickCheckerboard(Odd ,UmuOdd,Umu);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp)
{
int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
// we drop off the innermost fifth dimension
// assert( (disp==1)||(disp==-1) );
// assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
int skip = (disp==1) ? 0 : 1;
int dirdisp = dir+skip*Nd;
int gamma = dir+(1-skip)*Nd;
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in,compressor);
uint64_t Nsite = Umu.Grid()->oSites();
Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
};
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
{
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in,compressor);
uint64_t Nsite = Umu.Grid()->oSites();
Kernels::DhopDirAll(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out);
};
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
assert((dag==DaggerNo) ||(dag==DaggerYes));
conformable(st.Grid(),A.Grid());
conformable(st.Grid(),B.Grid());
Compressor compressor(dag);
FermionField Btilde(B.Grid());
FermionField Atilde(B.Grid());
st.HaloExchange(B,compressor);
Atilde=A;
int LLs = B.Grid()->_rdimensions[0];
for (int mu = 0; mu < Nd; mu++) {
////////////////////////////////////////////////////////////////////////
// Flip gamma if dag
////////////////////////////////////////////////////////////////////////
int gamma = mu;
if (!dag) gamma += Nd;
////////////////////////
// Call the single hop
////////////////////////
int Usites = U.Grid()->oSites();
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma);
////////////////////////////
// spin trace outer product
////////////////////////////
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
}
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDeriv(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
conformable(A.Grid(),FermionGrid());
conformable(A.Grid(),B.Grid());
//conformable(GaugeGrid(),mat.Grid());// this is not general! leaving as a comment
mat.Checkerboard() = A.Checkerboard();
// mat.checkerboard = A.checkerboard;
DerivInternal(Stencil,Umu,mat,A,B,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
conformable(A.Grid(),FermionRedBlackGrid());
conformable(A.Grid(),B.Grid());
assert(B.Checkerboard()==Odd);
assert(A.Checkerboard()==Even);
mat.Checkerboard() = Even;
DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
{
conformable(A.Grid(),FermionRedBlackGrid());
conformable(A.Grid(),B.Grid());
assert(B.Checkerboard()==Even);
assert(A.Checkerboard()==Odd);
mat.Checkerboard() = Odd;
DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternal(StencilImpl & st,
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
DhopInternalSerialComms(st,U,in,out,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternalOverlappedComms(StencilImpl & st,
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
GRID_TRACE("DhopInternalOverlappedComms");
Compressor compressor(dag);
int LLs = in.Grid()->_rdimensions[0];
int len = U.Grid()->oSites();
/////////////////////////////
// Start comms // Gather intranode and extra node differentiated??
/////////////////////////////
{
// std::cout << " TwoSpinWilsonFermion3plus1D gather " <<std::endl;
GRID_TRACE("Gather");
st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
}
// std::cout << " TwoSpinWilsonFermion3plus1D Communicate Begin " <<std::endl;
std::vector<std::vector<CommsRequest_t> > requests;
#if 1
/////////////////////////////
// Overlap with comms
/////////////////////////////
st.CommunicateBegin(requests);
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
#endif
/////////////////////////////
// do the compute interior
/////////////////////////////
if (dag == DaggerYes) {
GRID_TRACE("DhopDagInterior");
Kernels::DhopDagKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
} else {
GRID_TRACE("DhopInterior");
Kernels::DhopKernel (st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
}
//ifdef GRID_ACCELERATED
#if 0
/////////////////////////////
// Overlap with comms -- on GPU the interior kernel call is nonblocking
/////////////////////////////
st.CommunicateBegin(requests);
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
#endif
/////////////////////////////
// Complete comms
/////////////////////////////
// std::cout << " TwoSpinWilsonFermion3plus1D Comms Complete " <<std::endl;
st.CommunicateComplete(requests);
// traceStop(id);
/////////////////////////////
// do the compute exterior
/////////////////////////////
{
// std::cout << " TwoSpinWilsonFermion3plus1D Comms Merge " <<std::endl;
GRID_TRACE("Merge");
st.CommsMerge(compressor);
}
// std::cout << " TwoSpinWilsonFermion3plus1D Exterior " <<std::endl;
if (dag == DaggerYes) {
GRID_TRACE("DhopDagExterior");
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
} else {
GRID_TRACE("DhopExterior");
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
}
// std::cout << " TwoSpinWilsonFermion3plus1D Done " <<std::endl;
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternalSerialComms(StencilImpl & st,
DoubledGaugeField & U,
const FermionField &in,
FermionField &out,int dag)
{
GRID_TRACE("DhopInternalSerialComms");
Compressor compressor(dag);
int LLs = in.Grid()->_rdimensions[0];
// std::cout << " TwoSpinWilsonFermion3plus1D Halo exch " <<std::endl;
{
GRID_TRACE("HaloExchange");
st.HaloExchangeOpt(in,compressor);
}
// std::cout << " TwoSpinWilsonFermion3plus1D Dhop " <<std::endl;
if (dag == DaggerYes) {
GRID_TRACE("DhopDag");
Kernels::DhopDagKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out);
} else {
GRID_TRACE("Dhop");
Kernels::DhopKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out);
}
// std::cout << " TwoSpinWilsonFermion3plus1D Done " <<std::endl;
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
{
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.Checkerboard()==Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven,UmuOdd,in,out,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
{
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
conformable(in.Grid(),out.Grid()); // drops the cb check
assert(in.Checkerboard()==Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd,UmuEven,in,out,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopComms(const FermionField &in, FermionField &out)
{
int dag =0 ;
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
Compressor compressor(dag);
Stencil.HaloExchangeOpt(in,compressor);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DhopCalc(const FermionField &in, FermionField &out,uint64_t *ids)
{
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
int LLs = in.Grid()->_rdimensions[0];
Kernels::DhopKernel(Stencil,Umu,Stencil.CommBuf(),LLs,Umu.oSites(),in,out,ids);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
{
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil,Umu,in,out,dag);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
{
out.Checkerboard()=in.Checkerboard();
Dhop(in,out,dag); // -0.5 is included
axpy(out,Nd*1.0-M5,in,out);
}
template <class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::Meooe(const FermionField &in, FermionField &out)
{
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
}
}
template <class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::MeooeDag(const FermionField &in, FermionField &out)
{
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
}
}
template <class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::Mooee(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
typename FermionField::scalar_type scal(Nd*1.0 + M5);
out = scal * in;
}
template <class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::MooeeDag(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
Mooee(in, out);
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
out = (1.0/(Nd*1.0 + M5))*in;
}
template<class Impl>
void TwoSpinWilsonFermion3plus1D<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
MooeeInv(in,out);
}
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,441 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/TwoSpinWilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////
#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
} else { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
acceleratorSynchronise(); \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi);
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi); \
} \
acceleratorSynchronise();
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (!SE->_is_local ) { \
auto chi = coalescedRead(buf[SE->_offset],lane); \
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
Recon(result, Uchi); \
nmu++; \
} \
acceleratorSynchronise();
#define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon) \
if (SE->_is_local ) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
spProj(chi,tmp); \
} else { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
acceleratorSynchronise(); \
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
Recon(result, Uchi);
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
if (gamma == Dir) { \
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon); \
}
////////////////////////////////////////////////////////////////////
// All legs kernels ; comms then compute
////////////////////////////////////////////////////////////////////
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::DhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcSpinor chi;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
GENERIC_STENCIL_LEG(Xp,pauliProjXp,pauliAssign);
GENERIC_STENCIL_LEG(Yp,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG(Zp,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG(Xm,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG(Ym,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG(Zm,pauliProjZm,pauliAdd);
coalescedWrite(out[sF],result,lane);
};
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcSpinor chi;
// calcSpinor *chi_p;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
GENERIC_STENCIL_LEG(Xm,pauliProjXp,pauliAssign);
GENERIC_STENCIL_LEG(Ym,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG(Zm,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG(Xp,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG(Yp,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG(Zp,pauliProjZm,pauliAdd);
coalescedWrite(out[sF], result,lane);
};
////////////////////////////////////////////////////////////////////
// Interior kernels
////////////////////////////////////////////////////////////////////
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcSpinor chi;
// calcSpinor *chi_p;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_INT(Xp,pauliProjXp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Yp,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Zp,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Xm,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG_INT(Ym,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG_INT(Zm,pauliProjZm,pauliAdd);
coalescedWrite(out[sF], result,lane);
};
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
calcSpinor chi;
// calcSpinor *chi_p;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
result=Zero();
GENERIC_STENCIL_LEG_INT(Xm,pauliProjXp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Ym,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Zm,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG_INT(Xp,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG_INT(Yp,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG_INT(Zp,pauliProjZm,pauliAdd);
coalescedWrite(out[sF], result,lane);
};
////////////////////////////////////////////////////////////////////
// Exterior kernels
////////////////////////////////////////////////////////////////////
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
// calcSpinor *chi_p;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_EXT(Xp,pauliProjXp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Yp,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Zp,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Xm,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Ym,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Zm,pauliProjZm,pauliAdd);
if ( nmu ) {
auto out_t = coalescedRead(out[sF],lane);
out_t = out_t + result;
coalescedWrite(out[sF],out_t,lane);
}
};
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
// calcSpinor *chi_p;
calcSpinor Uchi;
calcSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
result=Zero();
GENERIC_STENCIL_LEG_EXT(Xm,pauliProjXp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Ym,pauliProjYp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Zm,pauliProjZp,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Xp,pauliProjXm,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Yp,pauliProjYm,pauliAdd);
GENERIC_STENCIL_LEG_EXT(Zp,pauliProjZm,pauliAdd);
if ( nmu ) {
auto out_t = coalescedRead(out[sF],lane);
out_t = out_t + result;
coalescedWrite(out[sF],out_t,lane);
}
};
#define DhopDirMacro(Dir,spProj,spRecon) \
template <class Impl> accelerator_inline \
void TwoSpinWilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteSpinor *buf, int sF, \
int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
{ \
typedef decltype(coalescedRead(in[0])) calcSpinor; \
calcSpinor chi; \
calcSpinor result; \
calcSpinor Uchi; \
StencilEntry *SE; \
int ptype; \
const int Nsimd = SiteSpinor::Nsimd(); \
const int lane=acceleratorSIMTlane(Nsimd); \
\
SE = st.GetEntry(ptype, dir, sF); \
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon); \
coalescedWrite(out[sF], result,lane); \
}
DhopDirMacro(Xp,pauliProjXp,pauliAssign);
DhopDirMacro(Yp,pauliProjYp,pauliAssign);
DhopDirMacro(Zp,pauliProjZp,pauliAssign);
DhopDirMacro(Xm,pauliProjXm,pauliAssign);
DhopDirMacro(Ym,pauliProjYm,pauliAssign);
DhopDirMacro(Zm,pauliProjZm,pauliAssign);
template <class Impl> accelerator_inline
void TwoSpinWilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
{
typedef decltype(coalescedRead(in[0])) calcSpinor;
calcSpinor chi;
calcSpinor result;
calcSpinor Uchi;
StencilEntry *SE;
int ptype;
const int Nsimd = SiteSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
SE = st.GetEntry(ptype, dir, sF);
GENERIC_DHOPDIR_LEG(Xp,pauliProjXp,pauliAssign);
GENERIC_DHOPDIR_LEG(Yp,pauliProjYp,pauliAssign);
GENERIC_DHOPDIR_LEG(Zp,pauliProjZp,pauliAssign);
GENERIC_DHOPDIR_LEG(Xm,pauliProjXm,pauliAssign);
GENERIC_DHOPDIR_LEG(Ym,pauliProjYm,pauliAssign);
GENERIC_DHOPDIR_LEG(Zm,pauliProjZm,pauliAssign);
coalescedWrite(out[sF], result,lane);
}
template <class Impl>
void TwoSpinWilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
int Nsite, const FermionField &in, std::vector<FermionField> &out)
{
autoView(U_v ,U,AcceleratorRead);
autoView(in_v ,in,AcceleratorRead);
autoView(st_v ,st,AcceleratorRead);
autoView(out_Xm,out[0],AcceleratorWrite);
autoView(out_Ym,out[1],AcceleratorWrite);
autoView(out_Zm,out[2],AcceleratorWrite);
autoView(out_Xp,out[4],AcceleratorWrite);
autoView(out_Yp,out[5],AcceleratorWrite);
autoView(out_Zp,out[6],AcceleratorWrite);
auto CBp=st.CommBuf();
accelerator_for(sss,Nsite*Ls,Simd::Nsimd(),{
int sU=sss/Ls;
int sF =sss;
DhopDirXm(st_v,U_v,CBp,sF,sU,in_v,out_Xm,0);
DhopDirYm(st_v,U_v,CBp,sF,sU,in_v,out_Ym,1);
DhopDirZm(st_v,U_v,CBp,sF,sU,in_v,out_Zm,2);
DhopDirXp(st_v,U_v,CBp,sF,sU,in_v,out_Xp,3);
DhopDirYp(st_v,U_v,CBp,sF,sU,in_v,out_Yp,4);
DhopDirZp(st_v,U_v,CBp,sF,sU,in_v,out_Zp,5);
});
}
template <class Impl>
void TwoSpinWilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
{
assert(dirdisp<=5);
assert(dirdisp>=0);
autoView(U_v ,U ,AcceleratorRead);
autoView(in_v ,in ,AcceleratorRead);
autoView(out_v,out,AcceleratorWrite);
autoView(st_v ,st ,AcceleratorRead);
auto CBp=st.CommBuf();
#define LoopBody(Dir) \
case Dir : \
accelerator_for(ss,Nsite,Simd::Nsimd(),{ \
for(int s=0;s<Ls;s++){ \
int sU=ss; \
int sF = s+Ls*sU; \
DhopDir##Dir(st_v,U_v,CBp,sF,sU,in_v,out_v,dirdisp);\
} \
}); \
break;
switch(gamma){
LoopBody(Xp);
LoopBody(Yp);
LoopBody(Zp);
LoopBody(Xm);
LoopBody(Ym);
LoopBody(Zm);
default:
assert(0);
break;
}
#undef LoopBody
}
#define KERNEL_CALLNB(A) \
const uint64_t NN = Nsite*Ls; \
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
int sF = ss; \
int sU = ss/Ls; \
TwoSpinWilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
});
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
#define KERNEL_CALL_EXT(A) \
const uint64_t sz = st.surface_list.size(); \
auto ptr = &st.surface_list[0]; \
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
int sF = ptr[ss]; \
int sU = sF/Ls; \
TwoSpinWilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
}); \
accelerator_barrier();
template <class Impl>
void TwoSpinWilsonKernels<Impl>::DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
{
autoView(U_v , U,AcceleratorRead);
autoView(in_v , in,AcceleratorRead);
autoView(out_v,out,AcceleratorWrite);
autoView(st_v , st,AcceleratorRead);
if( interior && exterior ) {
acceleratorFenceComputeStream();
KERNEL_CALL(GenericDhopSite);
return;
} else if( interior ) {
KERNEL_CALLNB(GenericDhopSiteInt);
return;
} else if( exterior ) {
// // dependent on result of merge
acceleratorFenceComputeStream();
KERNEL_CALL_EXT(GenericDhopSiteExt);
return;
}
assert(0 && " Kernel optimisation case not covered ");
}
template <class Impl>
void TwoSpinWilsonKernels<Impl>::DhopDagKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior,int exterior)
{
autoView(U_v ,U,AcceleratorRead);
autoView(in_v ,in,AcceleratorRead);
autoView(out_v,out,AcceleratorWrite);
autoView(st_v ,st,AcceleratorRead);
if( interior && exterior ) {
acceleratorFenceComputeStream();
KERNEL_CALL(GenericDhopSiteDag);
return;
} else if( interior ) {
KERNEL_CALLNB(GenericDhopSiteDagInt); return;
} else if( exterior ) {
// Dependent on result of merge
acceleratorFenceComputeStream();
KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;
}
assert(0 && " Kernel optimisation case not covered ");
}
#undef KERNEL_CALLNB
#undef KERNEL_CALL
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,61 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h>
NAMESPACE_BEGIN(Grid);
// S-direction is INNERMOST and takes no part in the parity.
const std::vector<int> TwoSpinWilsonFermion3plus1DStatic::directions (TwoSpinWilsonFermion3plus1DStatic::MakeDirections());
const std::vector<int> TwoSpinWilsonFermion3plus1DStatic::displacements(TwoSpinWilsonFermion3plus1DStatic::MakeDisplacements());
std::vector<int> TwoSpinWilsonFermion3plus1DStatic::MakeDirections (void)
{
std::vector<int> directions(2*Nd);
for(int d=0;d<Nd;d++){
directions[d] = d+1;
directions[d+Nd] = d+1;
}
return directions;
}
std::vector<int> TwoSpinWilsonFermion3plus1DStatic::MakeDisplacements(void)
{
std::vector<int> displacements(2*Nd);
for(int d=0;d<Nd;d++){
displacements[d] = +1;
displacements[d+Nd] = -1;
}
return displacements;
}
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,40 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/TwoSpinWilsonFermion3plus1DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class TwoSpinWilsonFermion3plus1D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@@ -0,0 +1,40 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/TwoSpinWilsonKernelsImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class TwoSpinWilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

220
Grid/qcd/spin/Pauli.h Normal file
View File

@@ -0,0 +1,220 @@
#ifndef GRID_QCD_PAULI_H
#define GRID_QCD_PAULI_H
#include <array>
NAMESPACE_BEGIN(Grid);
//
/*
* Pauli basis
* sx sy sz ident
* (0 1) , (0 -i) , ( 1 0 )
* (1 0) (i 0) ( 0 -1)
*
* These are hermitian.
*
* Also supply wilson "projectors" (1+/-sx), (1+/-sy), (1+/-sz)
*
* spPauliProjXm
* spPauliProjYm etc...
*/
class Pauli {
public:
GRID_SERIALIZABLE_ENUM(Algebra, undef,
SigmaX , 0,
MinusSigmaX , 1,
SigmaY , 2,
MinusSigmaY , 3,
SigmaZ , 4,
MinusSigmaZ , 5,
Identity , 6,
MinusIdentity , 7);
static constexpr unsigned int nPauli = 8;
static const std::array<const char *, nPauli> name;
static const std::array<std::array<Algebra, nPauli>, nPauli> mul;
static const std::array<Algebra, nPauli> adj;
static const std::array<const Pauli, 4> gmu;
static const std::array<const Pauli, 16> gall;
Algebra g;
public:
accelerator Pauli(Algebra initg): g(initg) {}
};
#define CopyImplementation(iTemplate,multPauli,multFlavour) \
template<class vtype> \
accelerator_inline void multPauli(iTemplate<vtype, Nhs> &ret, const iTemplate<vtype, Nhs> &rhs) { \
multFlavour(ret,rhs); \
}
CopyImplementation(iVector,multPauliSigmaX,multFlavourSigmaX);
CopyImplementation(iMatrix,lmultPauliSigmaX,lmultFlavourSigmaX);
CopyImplementation(iMatrix,rmultPauliSigmaX,rmultFlavourSigmaX);
CopyImplementation(iVector,multPauliMinusSigmaX ,multFlavourMinusSigmaX);
CopyImplementation(iMatrix,lmultPauliMinusSigmaX,lmultFlavourMinusSigmaX);
CopyImplementation(iMatrix,rmultPauliMinusSigmaX,rmultFlavourMinusSigmaX);
CopyImplementation(iVector,multPauliSigmaY,multFlavourSigmaY);
CopyImplementation(iMatrix,lmultPauliSigmaY,lmultFlavourSigmaY);
CopyImplementation(iMatrix,rmultPauliSigmaY,rmultFlavourSigmaY);
CopyImplementation(iVector,multPauliMinusSigmaY ,multFlavourMinusSigmaY);
CopyImplementation(iMatrix,lmultPauliMinusSigmaY,lmultFlavourMinusSigmaY);
CopyImplementation(iMatrix,rmultPauliMinusSigmaY,rmultFlavourMinusSigmaY);
CopyImplementation(iVector,multPauliSigmaZ,multFlavourSigmaZ);
CopyImplementation(iMatrix,lmultPauliSigmaZ,lmultFlavourSigmaZ);
CopyImplementation(iMatrix,rmultPauliSigmaZ,rmultFlavourSigmaZ);
CopyImplementation(iVector,multPauliMinusSigmaZ ,multFlavourMinusSigmaZ);
CopyImplementation(iMatrix,lmultPauliMinusSigmaZ,lmultFlavourMinusSigmaZ);
CopyImplementation(iMatrix,rmultPauliMinusSigmaZ,rmultFlavourMinusSigmaZ);
CopyImplementation(iVector,multPauliIdentity,multFlavourIdentity);
CopyImplementation(iMatrix,lmultPauliIdentity,lmultFlavourIdentity);
CopyImplementation(iMatrix,rmultPauliIdentity,rmultFlavourIdentity);
CopyImplementation(iVector,multPauliMinusIdentity ,multFlavourMinusIdentity);
CopyImplementation(iMatrix,lmultPauliMinusIdentity,lmultFlavourMinusIdentity);
CopyImplementation(iMatrix,rmultPauliMinusIdentity,rmultFlavourMinusIdentity);
/*
* sx sy sz ident
* (0 1) , (0 -i) , ( 1 0 )
* (1 0) (i 0) ( 0 -1)
*/
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjXp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=fspin(0)+fspin(1);
hspin(1)=fspin(1)+fspin(0);
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjXm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=fspin(0)-fspin(1);
hspin(1)=fspin(1)-fspin(0);
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjYp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=fspin(0)-timesI(fspin(1));
hspin(1)=fspin(1)+timesI(fspin(0));
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjYm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=fspin(0)+timesI(fspin(1));
hspin(1)=fspin(1)-timesI(fspin(0));
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjZp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=fspin(0)+fspin(0);
hspin(1)=Zero();
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjZm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
{
hspin(0)=Zero();
hspin(1)=fspin(1)+fspin(1);
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliAssign(iVector<vtype,Nhs> &fspin,const iVector<vtype,Nhs> &hspin)
{
fspin = hspin;
}
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliAdd (iVector<vtype,Nhs> &fspin,const iVector<vtype,Nhs> &hspin)
{
fspin = fspin + hspin;
}
template<class vtype>
accelerator_inline auto operator*(const Pauli &G, const iVector<vtype, Nhs> &arg)
->typename std::enable_if<matchGridTensorIndex<iVector<vtype, Nhs>, PauliIndex>::value, iVector<vtype, Nhs>>::type
{
iVector<vtype, Nhs> ret;
switch (G.g)
{
case Pauli::Algebra::SigmaX:
multPauliSigmaX(ret, arg); break;
case Pauli::Algebra::MinusSigmaX:
multPauliMinusSigmaX(ret, arg); break;
case Pauli::Algebra::SigmaY:
multPauliSigmaY(ret, arg); break;
case Pauli::Algebra::MinusSigmaY:
multPauliMinusSigmaY(ret, arg); break;
case Pauli::Algebra::SigmaZ:
multPauliSigmaZ(ret, arg); break;
case Pauli::Algebra::MinusSigmaZ:
multPauliMinusSigmaZ(ret, arg); break;
case Pauli::Algebra::Identity:
multPauliIdentity(ret, arg); break;
case Pauli::Algebra::MinusIdentity:
multPauliMinusIdentity(ret, arg); break;
default: assert(0);
}
return ret;
}
template<class vtype>
accelerator_inline auto operator*(const Pauli &G, const iMatrix<vtype, Nhs> &arg)
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Nhs>, PauliIndex>::value, iMatrix<vtype, Nhs>>::type
{
iMatrix<vtype, Nhs> ret;
switch (G.g)
{
case Pauli::Algebra::SigmaX:
lmultPauliSigmaX(ret, arg); break;
case Pauli::Algebra::MinusSigmaX:
lmultPauliMinusSigmaX(ret, arg); break;
case Pauli::Algebra::SigmaY:
lmultPauliSigmaY(ret, arg); break;
case Pauli::Algebra::MinusSigmaY:
lmultPauliMinusSigmaY(ret, arg); break;
case Pauli::Algebra::SigmaZ:
lmultPauliSigmaZ(ret, arg); break;
case Pauli::Algebra::MinusSigmaZ:
lmultPauliMinusSigmaZ(ret, arg); break;
case Pauli::Algebra::Identity:
lmultPauliIdentity(ret, arg); break;
case Pauli::Algebra::MinusIdentity:
lmultPauliMinusIdentity(ret, arg); break;
default: assert(0);
}
return ret;
}
template<class vtype>
accelerator_inline auto operator*(const iMatrix<vtype, Nhs> &arg, const Pauli &G)
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Nhs>, PauliIndex>::value, iMatrix<vtype, Nhs>>::type
{
iMatrix<vtype, Nhs> ret;
switch (G.g)
{
case Pauli::Algebra::SigmaX:
rmultPauliSigmaX(ret, arg); break;
case Pauli::Algebra::MinusSigmaX:
rmultPauliMinusSigmaX(ret, arg); break;
case Pauli::Algebra::SigmaY:
rmultPauliSigmaY(ret, arg); break;
case Pauli::Algebra::MinusSigmaY:
rmultPauliMinusSigmaY(ret, arg); break;
case Pauli::Algebra::SigmaZ:
rmultPauliSigmaZ(ret, arg); break;
case Pauli::Algebra::MinusSigmaZ:
rmultPauliMinusSigmaZ(ret, arg); break;
case Pauli::Algebra::Identity:
rmultPauliIdentity(ret, arg); break;
case Pauli::Algebra::MinusIdentity:
rmultPauliMinusIdentity(ret, arg); break;
default: assert(0);
}
return ret;
}
NAMESPACE_END(Grid);
#endif // GRID_QCD_GAMMA_H

5
TODO
View File

@@ -1,3 +1,8 @@
* Clean up the extract merge and replace with insertLane/extractLane
-----
i) Refine subspace with HDCG & recompute
ii) Block Lanczos in coarse space
iii) Batched block project in the operator computation

View File

@@ -0,0 +1,93 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/debug/Test_icosahedron.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
const int MyNd=3;
template<typename vtype> using iIcosahedralLorentzComplex = iVector<iScalar<iScalar<vtype> >, MyNd+1 > ;
typedef iIcosahedralLorentzComplex<ComplexD > IcosahedralLorentzComplexD;
typedef iIcosahedralLorentzComplex<vComplexD> vIcosahedralLorentzComplexD;
typedef Lattice<vIcosahedralLorentzComplexD> LatticeIcosahedralLorentzComplexD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
const int L=8;
const int Npatch = num_icosahedron_tiles;
// Put SIMD all in time direction
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout({1,1,vComplexD::Nsimd(),1});
Coordinate mpi_layout = GridDefaultMpi();
std::cout << GridLogMessage << " mpi "<<mpi_layout<<std::endl;
std::cout << GridLogMessage << " simd "<<simd_layout<<std::endl;
std::cout << GridLogMessage << " latt "<<latt_size<<std::endl;
GridCartesianCrossIcosahedron EdgeGrid(latt_size,simd_layout,mpi_layout,IcosahedralEdges);
std::cout << GridLogMessage << " Created edge grid "<<std::endl;
GridCartesianCrossIcosahedron VertexGrid(latt_size,simd_layout,mpi_layout,IcosahedralVertices);
std::cout << GridLogMessage << " Created vertex grid "<<std::endl;
LatticeIcosahedralLorentzComplexD Umu(&EdgeGrid);
LatticeComplex Phi(&VertexGrid);
std::cout << GridLogMessage << " Created two fields "<<std::endl;
Phi = Zero();
Umu = Zero();
std::cout << GridLogMessage << " Zeroed two fields "<<std::endl;
ComplexD one (1.0);
Phi = one;
Umu = one;
std::cout << GridLogMessage << " V = "<<norm2(Phi)<<std::endl;
std::cout << GridLogMessage << " Expect "<<latt_size[0]*latt_size[1]*latt_size[2]*10+2*latt_size[2]<<std::endl;
std::cout << GridLogMessage << " E = "<<norm2(Umu)<<std::endl;
std::cout << GridLogMessage << " Expect "<<latt_size[0]*latt_size[1]*latt_size[2]*10*4<<std::endl;
// std::cout << " Umu "<<Umu<<std::endl;
// std::cout << " Phi "<<Phi<<std::endl;
LatticePole(Phi,South);
std::cout << " Phi South Pole set\n"<<Phi<<std::endl;
LatticePole(Phi,North);
std::cout << " Phi North Pole set\n"<<Phi<<std::endl;
for(int mu=0;mu<VertexGrid._ndimension;mu++){
std::cout << " Calling lattice coordinate mu="<<mu<<std::endl;
LatticeCoordinate(Phi,mu);
std::cout << " Phi coor mu="<<mu<<"\n"<<Phi<<std::endl;
}
Grid_finalize();
}