mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Merge 1dfaa08afb
into b8a7004365
This commit is contained in:
commit
e28cf2b8be
@ -47,3 +47,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/lattice/Lattice_transfer.h>
|
||||
#include <Grid/lattice/Lattice_basis.h>
|
||||
#include <Grid/lattice/Lattice_crc.h>
|
||||
#include <Grid/lattice/PaddedCell.h>
|
||||
|
@ -697,8 +697,68 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
|
||||
for(int d=0;d<nd;d++){
|
||||
assert(Fg->_processors[d] == Tg->_processors[d]);
|
||||
}
|
||||
|
||||
// the above should guarantee that the operations are local
|
||||
|
||||
#if 1
|
||||
|
||||
size_t nsite = 1;
|
||||
for(int i=0;i<nd;i++) nsite *= RegionSize[i];
|
||||
|
||||
size_t tbytes = 4*nsite*sizeof(int);
|
||||
int *table = (int*)malloc(tbytes);
|
||||
|
||||
thread_for(idx, nsite, {
|
||||
Coordinate from_coor, to_coor;
|
||||
size_t rem = idx;
|
||||
for(int i=0;i<nd;i++){
|
||||
size_t base_i = rem % RegionSize[i]; rem /= RegionSize[i];
|
||||
from_coor[i] = base_i + FromLowerLeft[i];
|
||||
to_coor[i] = base_i + ToLowerLeft[i];
|
||||
}
|
||||
|
||||
int foidx = Fg->oIndex(from_coor);
|
||||
int fiidx = Fg->iIndex(from_coor);
|
||||
int toidx = Tg->oIndex(to_coor);
|
||||
int tiidx = Tg->iIndex(to_coor);
|
||||
int* tt = table + 4*idx;
|
||||
tt[0] = foidx;
|
||||
tt[1] = fiidx;
|
||||
tt[2] = toidx;
|
||||
tt[3] = tiidx;
|
||||
});
|
||||
|
||||
int* table_d = (int*)acceleratorAllocDevice(tbytes);
|
||||
acceleratorCopyToDevice(table,table_d,tbytes);
|
||||
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
autoView(from_v,From,AcceleratorRead);
|
||||
autoView(to_v,To,AcceleratorWrite);
|
||||
|
||||
accelerator_for(idx,nsite,1,{
|
||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
int* tt = table_d + 4*idx;
|
||||
int from_oidx = *tt++;
|
||||
int from_lane = *tt++;
|
||||
int to_oidx = *tt++;
|
||||
int to_lane = *tt;
|
||||
|
||||
const vector_type* from = (const vector_type *)&from_v[from_oidx];
|
||||
vector_type* to = (vector_type *)&to_v[to_oidx];
|
||||
|
||||
scalar_type stmp;
|
||||
for(int w=0;w<words;w++){
|
||||
stmp = getlane(from[w], from_lane);
|
||||
putlane(to[w], stmp, to_lane);
|
||||
}
|
||||
});
|
||||
|
||||
acceleratorFreeDevice(table_d);
|
||||
free(table);
|
||||
|
||||
|
||||
#else
|
||||
Coordinate ldf = Fg->_ldimensions;
|
||||
Coordinate rdf = Fg->_rdimensions;
|
||||
Coordinate isf = Fg->_istride;
|
||||
@ -738,6 +798,8 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
|
||||
#endif
|
||||
}
|
||||
});
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -830,6 +892,8 @@ void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slic
|
||||
}
|
||||
|
||||
|
||||
//Insert subvolume orthogonal to direction 'orthog' with slice index 'slice_lo' from 'lowDim' onto slice index 'slice_hi' of higherDim
|
||||
//The local dimensions of both 'lowDim' and 'higherDim' orthogonal to 'orthog' should be the same
|
||||
template<class vobj>
|
||||
void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||
{
|
||||
@ -851,6 +915,65 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
||||
}
|
||||
}
|
||||
|
||||
#if 1
|
||||
size_t nsite = lg->lSites()/lg->LocalDimensions()[orthog];
|
||||
size_t tbytes = 4*nsite*sizeof(int);
|
||||
int *table = (int*)malloc(tbytes);
|
||||
|
||||
thread_for(idx,nsite,{
|
||||
Coordinate lcoor(nl);
|
||||
Coordinate hcoor(nh);
|
||||
lcoor[orthog] = slice_lo;
|
||||
hcoor[orthog] = slice_hi;
|
||||
size_t rem = idx;
|
||||
for(int mu=0;mu<nl;mu++){
|
||||
if(mu != orthog){
|
||||
int xmu = rem % lg->LocalDimensions()[mu]; rem /= lg->LocalDimensions()[mu];
|
||||
lcoor[mu] = hcoor[mu] = xmu;
|
||||
}
|
||||
}
|
||||
int loidx = lg->oIndex(lcoor);
|
||||
int liidx = lg->iIndex(lcoor);
|
||||
int hoidx = hg->oIndex(hcoor);
|
||||
int hiidx = hg->iIndex(hcoor);
|
||||
int* tt = table + 4*idx;
|
||||
tt[0] = loidx;
|
||||
tt[1] = liidx;
|
||||
tt[2] = hoidx;
|
||||
tt[3] = hiidx;
|
||||
});
|
||||
|
||||
int* table_d = (int*)acceleratorAllocDevice(tbytes);
|
||||
acceleratorCopyToDevice(table,table_d,tbytes);
|
||||
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
autoView(lowDim_v,lowDim,AcceleratorRead);
|
||||
autoView(higherDim_v,higherDim,AcceleratorWrite);
|
||||
|
||||
accelerator_for(idx,nsite,1,{
|
||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
int* tt = table_d + 4*idx;
|
||||
int from_oidx = *tt++;
|
||||
int from_lane = *tt++;
|
||||
int to_oidx = *tt++;
|
||||
int to_lane = *tt;
|
||||
|
||||
const vector_type* from = (const vector_type *)&lowDim_v[from_oidx];
|
||||
vector_type* to = (vector_type *)&higherDim_v[to_oidx];
|
||||
|
||||
scalar_type stmp;
|
||||
for(int w=0;w<words;w++){
|
||||
stmp = getlane(from[w], from_lane);
|
||||
putlane(to[w], stmp, to_lane);
|
||||
}
|
||||
});
|
||||
|
||||
acceleratorFreeDevice(table_d);
|
||||
free(table);
|
||||
|
||||
#else
|
||||
// the above should guarantee that the operations are local
|
||||
autoView(lowDimv,lowDim,CpuRead);
|
||||
autoView(higherDimv,higherDim,CpuWrite);
|
||||
@ -866,6 +989,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
||||
pokeLocalSite(s,higherDimv,hcoor);
|
||||
}
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -26,14 +26,32 @@ Author: Peter Boyle pboyle@bnl.gov
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include<Grid/cshift/Cshift.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//Allow the user to specify how the C-shift is performed, e.g. to respect the appropriate boundary conditions
|
||||
template<typename vobj>
|
||||
struct CshiftImplBase{
|
||||
virtual Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const = 0;
|
||||
virtual ~CshiftImplBase(){}
|
||||
};
|
||||
template<typename vobj>
|
||||
struct CshiftImplDefault: public CshiftImplBase<vobj>{
|
||||
Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const override{ return Grid::Cshift(in,dir,shift); }
|
||||
};
|
||||
template<typename Gimpl>
|
||||
struct CshiftImplGauge: public CshiftImplBase<typename Gimpl::GaugeLinkField::vector_object>{
|
||||
typename Gimpl::GaugeLinkField Cshift(const typename Gimpl::GaugeLinkField &in, int dir, int shift) const override{ return Gimpl::CshiftLink(in,dir,shift); }
|
||||
};
|
||||
|
||||
class PaddedCell {
|
||||
public:
|
||||
GridCartesian * unpadded_grid;
|
||||
int dims;
|
||||
int depth;
|
||||
std::vector<GridCartesian *> grids;
|
||||
|
||||
~PaddedCell()
|
||||
{
|
||||
DeleteGrids();
|
||||
@ -77,7 +95,7 @@ public:
|
||||
}
|
||||
};
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> Extract(Lattice<vobj> &in)
|
||||
inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
|
||||
{
|
||||
Lattice<vobj> out(unpadded_grid);
|
||||
|
||||
@ -88,19 +106,19 @@ public:
|
||||
return out;
|
||||
}
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> Exchange(Lattice<vobj> &in)
|
||||
inline Lattice<vobj> Exchange(const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
|
||||
{
|
||||
GridBase *old_grid = in.Grid();
|
||||
int dims = old_grid->Nd();
|
||||
Lattice<vobj> tmp = in;
|
||||
for(int d=0;d<dims;d++){
|
||||
tmp = Expand(d,tmp); // rvalue && assignment
|
||||
tmp = Expand(d,tmp,cshift); // rvalue && assignment
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
// expand up one dim at a time
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> Expand(int dim,Lattice<vobj> &in)
|
||||
inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
|
||||
{
|
||||
GridBase *old_grid = in.Grid();
|
||||
GridCartesian *new_grid = grids[dim];//These are new grids
|
||||
@ -112,20 +130,40 @@ public:
|
||||
else conformable(old_grid,grids[dim-1]);
|
||||
|
||||
std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
|
||||
|
||||
double tins=0, tshift=0;
|
||||
|
||||
// Middle bit
|
||||
double t = usecond();
|
||||
for(int x=0;x<local[dim];x++){
|
||||
InsertSliceLocal(in,padded,x,depth+x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
// High bit
|
||||
shifted = Cshift(in,dim,depth);
|
||||
t = usecond();
|
||||
shifted = cshift.Cshift(in,dim,depth);
|
||||
tshift += usecond() - t;
|
||||
|
||||
t=usecond();
|
||||
for(int x=0;x<depth;x++){
|
||||
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
// Low bit
|
||||
shifted = Cshift(in,dim,-depth);
|
||||
t = usecond();
|
||||
shifted = cshift.Cshift(in,dim,-depth);
|
||||
tshift += usecond() - t;
|
||||
|
||||
t = usecond();
|
||||
for(int x=0;x<depth;x++){
|
||||
InsertSliceLocal(shifted,padded,x,x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
|
||||
|
||||
return padded;
|
||||
}
|
||||
|
||||
|
@ -176,7 +176,7 @@ public:
|
||||
return PeriodicBC::CshiftLink(Link,mu,shift);
|
||||
}
|
||||
|
||||
static inline void setDirections(std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
|
||||
static inline void setDirections(const std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
|
||||
static inline std::vector<int> getDirections(void) { return _conjDirs; }
|
||||
static inline bool isPeriodicGaugeField(void) { return false; }
|
||||
};
|
||||
|
@ -43,7 +43,7 @@ public:
|
||||
private:
|
||||
RealD c_plaq;
|
||||
RealD c_rect;
|
||||
|
||||
typename WilsonLoops<Gimpl>::StapleAndRectStapleAllWorkspace workspace;
|
||||
public:
|
||||
PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){};
|
||||
|
||||
@ -79,27 +79,18 @@ public:
|
||||
GridBase *grid = Umu.Grid();
|
||||
|
||||
std::vector<GaugeLinkField> U (Nd,grid);
|
||||
std::vector<GaugeLinkField> U2(Nd,grid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu],U[mu],mu);
|
||||
}
|
||||
std::vector<GaugeLinkField> RectStaple(Nd,grid), Staple(Nd,grid);
|
||||
WilsonLoops<Gimpl>::StapleAndRectStapleAll(Staple, RectStaple, U, workspace);
|
||||
|
||||
GaugeLinkField dSdU_mu(grid);
|
||||
GaugeLinkField staple(grid);
|
||||
|
||||
for (int mu=0; mu < Nd; mu++){
|
||||
|
||||
// Staple in direction mu
|
||||
|
||||
WilsonLoops<Gimpl>::Staple(staple,Umu,mu);
|
||||
|
||||
dSdU_mu = Ta(U[mu]*staple)*factor_p;
|
||||
|
||||
WilsonLoops<Gimpl>::RectStaple(Umu,staple,U2,U,mu);
|
||||
|
||||
dSdU_mu = dSdU_mu + Ta(U[mu]*staple)*factor_r;
|
||||
dSdU_mu = Ta(U[mu]*Staple[mu])*factor_p;
|
||||
dSdU_mu = dSdU_mu + Ta(U[mu]*RectStaple[mu])*factor_r;
|
||||
|
||||
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
|
||||
}
|
||||
|
@ -37,13 +37,14 @@ NAMESPACE_BEGIN(Grid);
|
||||
// Make these members of an Impl class for BC's.
|
||||
|
||||
namespace PeriodicBC {
|
||||
|
||||
//Out(x) = Link(x)*field(x+mu)
|
||||
template<class covariant,class gauge> Lattice<covariant> CovShiftForward(const Lattice<gauge> &Link,
|
||||
int mu,
|
||||
const Lattice<covariant> &field)
|
||||
{
|
||||
return Link*Cshift(field,mu,1);// moves towards negative mu
|
||||
}
|
||||
//Out(x) = Link^dag(x-mu)*field(x-mu)
|
||||
template<class covariant,class gauge> Lattice<covariant> CovShiftBackward(const Lattice<gauge> &Link,
|
||||
int mu,
|
||||
const Lattice<covariant> &field)
|
||||
@ -52,19 +53,19 @@ namespace PeriodicBC {
|
||||
tmp = adj(Link)*field;
|
||||
return Cshift(tmp,mu,-1);// moves towards positive mu
|
||||
}
|
||||
|
||||
//Out(x) = Link^dag(x-mu)
|
||||
template<class gauge> Lattice<gauge>
|
||||
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu)
|
||||
{
|
||||
return Cshift(adj(Link), mu, -1);
|
||||
}
|
||||
|
||||
//Out(x) = Link(x)
|
||||
template<class gauge> Lattice<gauge>
|
||||
CovShiftIdentityForward(const Lattice<gauge> &Link, int mu)
|
||||
{
|
||||
return Link;
|
||||
}
|
||||
|
||||
//Link(x) = Link(x+mu)
|
||||
template<class gauge> Lattice<gauge>
|
||||
ShiftStaple(const Lattice<gauge> &Link, int mu)
|
||||
{
|
||||
|
@ -290,7 +290,7 @@ public:
|
||||
}
|
||||
*/
|
||||
//////////////////////////////////////////////////
|
||||
// the sum over all staples on each site
|
||||
// the sum over all nu-oriented staples for nu != mu on each site
|
||||
//////////////////////////////////////////////////
|
||||
static void Staple(GaugeMat &staple, const GaugeLorentz &Umu, int mu) {
|
||||
|
||||
@ -300,6 +300,10 @@ public:
|
||||
for (int d = 0; d < Nd; d++) {
|
||||
U[d] = PeekIndex<LorentzIndex>(Umu, d);
|
||||
}
|
||||
Staple(staple, U, mu);
|
||||
}
|
||||
|
||||
static void Staple(GaugeMat &staple, const std::vector<GaugeMat> &U, int mu) {
|
||||
staple = Zero();
|
||||
|
||||
for (int nu = 0; nu < Nd; nu++) {
|
||||
@ -335,6 +339,202 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/////////////
|
||||
//Staples for each direction mu, summed over nu != mu
|
||||
//staple: output staples for each mu (Nd)
|
||||
//U: link array (Nd)
|
||||
/////////////
|
||||
static void StapleAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U) {
|
||||
assert(staple.size() == Nd); assert(U.size() == Nd);
|
||||
for(int mu=0;mu<Nd;mu++) Staple(staple[mu], U, mu);
|
||||
}
|
||||
|
||||
|
||||
//A workspace class allowing reuse of the stencil
|
||||
class WilsonLoopPaddedStencilWorkspace{
|
||||
std::unique_ptr<GeneralLocalStencil> stencil;
|
||||
size_t nshift;
|
||||
|
||||
void generateStencil(GridBase* padded_grid){
|
||||
double t0 = usecond();
|
||||
|
||||
//Generate shift arrays
|
||||
std::vector<Coordinate> shifts = this->getShifts();
|
||||
nshift = shifts.size();
|
||||
|
||||
double t1 = usecond();
|
||||
//Generate local stencil
|
||||
stencil.reset(new GeneralLocalStencil(padded_grid,shifts));
|
||||
double t2 = usecond();
|
||||
std::cout << GridLogPerformance << " WilsonLoopPaddedWorkspace timings: coord:" << (t1-t0)/1000 << "ms, stencil:" << (t2-t1)/1000 << "ms" << std::endl;
|
||||
}
|
||||
public:
|
||||
//Get the stencil. If not already generated, or if generated using a different Grid than in PaddedCell, it will be created on-the-fly
|
||||
const GeneralLocalStencil & getStencil(const PaddedCell &pcell){
|
||||
assert(pcell.depth >= this->paddingDepth());
|
||||
if(!stencil || stencil->Grid() != (GridBase*)pcell.grids.back() ) generateStencil((GridBase*)pcell.grids.back());
|
||||
return *stencil;
|
||||
}
|
||||
size_t Nshift() const{ return nshift; }
|
||||
|
||||
virtual std::vector<Coordinate> getShifts() const = 0;
|
||||
virtual int paddingDepth() const = 0; //padding depth required
|
||||
|
||||
virtual ~WilsonLoopPaddedStencilWorkspace(){}
|
||||
};
|
||||
|
||||
//This workspace allows the sharing of a common PaddedCell object between multiple stencil workspaces
|
||||
class WilsonLoopPaddedWorkspace{
|
||||
std::vector<WilsonLoopPaddedStencilWorkspace*> stencil_wk;
|
||||
std::unique_ptr<PaddedCell> pcell;
|
||||
|
||||
void generatePcell(GridBase* unpadded_grid){
|
||||
assert(stencil_wk.size());
|
||||
int max_depth = 0;
|
||||
for(auto const &s : stencil_wk) max_depth=std::max(max_depth, s->paddingDepth());
|
||||
|
||||
pcell.reset(new PaddedCell(max_depth, dynamic_cast<GridCartesian*>(unpadded_grid)));
|
||||
}
|
||||
|
||||
public:
|
||||
//Add a stencil definition. This should be done before the first call to retrieve a stencil object.
|
||||
//Takes ownership of the pointer
|
||||
void addStencil(WilsonLoopPaddedStencilWorkspace *stencil){
|
||||
assert(!pcell);
|
||||
stencil_wk.push_back(stencil);
|
||||
}
|
||||
|
||||
const GeneralLocalStencil & getStencil(const size_t stencil_idx, GridBase* unpadded_grid){
|
||||
if(!pcell || pcell->unpadded_grid != unpadded_grid) generatePcell(unpadded_grid);
|
||||
return stencil_wk[stencil_idx]->getStencil(*pcell);
|
||||
}
|
||||
const PaddedCell & getPaddedCell(GridBase* unpadded_grid){
|
||||
if(!pcell || pcell->unpadded_grid != unpadded_grid) generatePcell(unpadded_grid);
|
||||
return *pcell;
|
||||
}
|
||||
|
||||
~WilsonLoopPaddedWorkspace(){
|
||||
for(auto &s : stencil_wk) delete s;
|
||||
}
|
||||
};
|
||||
|
||||
//A workspace class allowing reuse of the stencil
|
||||
class StaplePaddedAllWorkspace: public WilsonLoopPaddedStencilWorkspace{
|
||||
public:
|
||||
std::vector<Coordinate> getShifts() const override{
|
||||
std::vector<Coordinate> shifts;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
if(nu != mu){
|
||||
Coordinate shift_0(Nd,0);
|
||||
Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
|
||||
Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
|
||||
Coordinate shift_mnu(Nd,0); shift_mnu[nu]=-1;
|
||||
Coordinate shift_mnu_pmu(Nd,0); shift_mnu_pmu[nu]=-1; shift_mnu_pmu[mu]=1;
|
||||
|
||||
//U_nu(x+mu)U^dag_mu(x+nu) U^dag_nu(x)
|
||||
shifts.push_back(shift_0);
|
||||
shifts.push_back(shift_nu);
|
||||
shifts.push_back(shift_mu);
|
||||
|
||||
//U_nu^dag(x-nu+mu) U_mu^dag(x-nu) U_nu(x-nu)
|
||||
shifts.push_back(shift_mnu);
|
||||
shifts.push_back(shift_mnu);
|
||||
shifts.push_back(shift_mnu_pmu);
|
||||
}
|
||||
}
|
||||
}
|
||||
return shifts;
|
||||
}
|
||||
|
||||
int paddingDepth() const override{ return 1; }
|
||||
};
|
||||
|
||||
//Padded cell implementation of the staple method for all mu, summed over nu != mu
|
||||
//staple: output staple for each mu, summed over nu != mu (Nd)
|
||||
//U_padded: the gauge link fields padded out using the PaddedCell class
|
||||
//Cell: the padded cell class
|
||||
static void StaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell) {
|
||||
StaplePaddedAllWorkspace wk;
|
||||
StaplePaddedAll(staple,U_padded,Cell,wk.getStencil(Cell));
|
||||
}
|
||||
|
||||
//Padded cell implementation of the staple method for all mu, summed over nu != mu
|
||||
//staple: output staple for each mu, summed over nu != mu (Nd)
|
||||
//U_padded: the gauge link fields padded out using the PaddedCell class
|
||||
//Cell: the padded cell class
|
||||
//gStencil: the precomputed generalized local stencil for the staple
|
||||
static void StaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) {
|
||||
double t0 = usecond();
|
||||
assert(U_padded.size() == Nd); assert(staple.size() == Nd);
|
||||
assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back());
|
||||
assert(Cell.depth >= 1);
|
||||
GridBase *ggrid = U_padded[0].Grid(); //padded cell grid
|
||||
|
||||
int shift_mu_off = gStencil._npoints/Nd;
|
||||
|
||||
//Open views to padded gauge links and keep open over mu loop
|
||||
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
|
||||
size_t vsize = Nd*sizeof(GaugeViewType);
|
||||
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
|
||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = U_padded[i].View(AcceleratorRead);
|
||||
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
|
||||
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
|
||||
|
||||
GaugeMat gStaple(ggrid);
|
||||
|
||||
int outer_off = 0;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
{ //view scope
|
||||
autoView( gStaple_v , gStaple, AcceleratorWrite);
|
||||
auto gStencil_v = gStencil.View();
|
||||
|
||||
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
|
||||
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
|
||||
stencil_ss = Zero();
|
||||
int off = outer_off;
|
||||
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
if(nu != mu){
|
||||
GeneralStencilEntry const* e = gStencil_v.GetEntry(off++,ss);
|
||||
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(off++,ss);
|
||||
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(off++,ss);
|
||||
auto U2 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
|
||||
stencil_ss = stencil_ss + U2 * U1 * U0;
|
||||
|
||||
e = gStencil_v.GetEntry(off++,ss);
|
||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(off++,ss);
|
||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(off++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
|
||||
stencil_ss = stencil_ss + U2 * U1 * U0;
|
||||
}
|
||||
}
|
||||
|
||||
coalescedWrite(gStaple_v[ss],stencil_ss);
|
||||
}
|
||||
);
|
||||
} //ensure views are all closed!
|
||||
|
||||
staple[mu] = Cell.Extract(gStaple);
|
||||
outer_off += shift_mu_off;
|
||||
}//mu loop
|
||||
|
||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
|
||||
free(Ug_dirs_v_host);
|
||||
acceleratorFreeDevice(Ug_dirs_v);
|
||||
|
||||
double t1=usecond();
|
||||
|
||||
std::cout << GridLogPerformance << "StaplePaddedAll timing:" << (t1-t0)/1000 << "ms" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// the sum over all staples on each site in direction mu,nu, upper part
|
||||
//////////////////////////////////////////////////
|
||||
@ -707,18 +907,14 @@ public:
|
||||
// the sum over all staples on each site
|
||||
//////////////////////////////////////////////////
|
||||
static void RectStapleDouble(GaugeMat &U2, const GaugeMat &U, int mu) {
|
||||
U2 = U * Cshift(U, mu, 1);
|
||||
U2 = U * Gimpl::CshiftLink(U, mu, 1);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Hop by two optimisation strategy does not work nicely with Gparity. (could
|
||||
// do,
|
||||
// but need to track two deep where cross boundary and apply a conjugation).
|
||||
// Must differentiate this in Gimpl, and use Gimpl::isPeriodicGaugeField to do
|
||||
// so .
|
||||
// Hop by two optimisation strategy. Use RectStapleDouble to obtain 'U2'
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
static void RectStapleOptimised(GaugeMat &Stap, std::vector<GaugeMat> &U2,
|
||||
std::vector<GaugeMat> &U, int mu) {
|
||||
static void RectStapleOptimised(GaugeMat &Stap, const std::vector<GaugeMat> &U2,
|
||||
const std::vector<GaugeMat> &U, int mu) {
|
||||
|
||||
Stap = Zero();
|
||||
|
||||
@ -732,9 +928,9 @@ public:
|
||||
|
||||
// Up staple ___ ___
|
||||
// | |
|
||||
tmp = Cshift(adj(U[nu]), nu, -1);
|
||||
tmp = Gimpl::CshiftLink(adj(U[nu]), nu, -1);
|
||||
tmp = adj(U2[mu]) * tmp;
|
||||
tmp = Cshift(tmp, mu, -2);
|
||||
tmp = Gimpl::CshiftLink(tmp, mu, -2);
|
||||
|
||||
Staple2x1 = Gimpl::CovShiftForward(U[nu], nu, tmp);
|
||||
|
||||
@ -742,14 +938,14 @@ public:
|
||||
// |___ ___|
|
||||
//
|
||||
tmp = adj(U2[mu]) * U[nu];
|
||||
Staple2x1 += Gimpl::CovShiftBackward(U[nu], nu, Cshift(tmp, mu, -2));
|
||||
Staple2x1 += Gimpl::CovShiftBackward(U[nu], nu, Gimpl::CshiftLink(tmp, mu, -2));
|
||||
|
||||
// ___ ___
|
||||
// | ___|
|
||||
// |___ ___|
|
||||
//
|
||||
|
||||
Stap += Cshift(Gimpl::CovShiftForward(U[mu], mu, Staple2x1), mu, 1);
|
||||
Stap += Gimpl::CshiftLink(Gimpl::CovShiftForward(U[mu], mu, Staple2x1), mu, 1);
|
||||
|
||||
// ___ ___
|
||||
// |___ |
|
||||
@ -758,7 +954,7 @@ public:
|
||||
|
||||
// tmp= Staple2x1* Cshift(U[mu],mu,-2);
|
||||
// Stap+= Cshift(tmp,mu,1) ;
|
||||
Stap += Cshift(Staple2x1, mu, 1) * Cshift(U[mu], mu, -1);
|
||||
Stap += Gimpl::CshiftLink(Staple2x1, mu, 1) * Gimpl::CshiftLink(U[mu], mu, -1);
|
||||
;
|
||||
|
||||
// --
|
||||
@ -766,10 +962,10 @@ public:
|
||||
//
|
||||
// | |
|
||||
|
||||
tmp = Cshift(adj(U2[nu]), nu, -2);
|
||||
tmp = Gimpl::CshiftLink(adj(U2[nu]), nu, -2);
|
||||
tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
|
||||
tmp = U2[nu] * Cshift(tmp, nu, 2);
|
||||
Stap += Cshift(tmp, mu, 1);
|
||||
tmp = U2[nu] * Gimpl::CshiftLink(tmp, nu, 2);
|
||||
Stap += Gimpl::CshiftLink(tmp, mu, 1);
|
||||
|
||||
// | |
|
||||
//
|
||||
@ -778,25 +974,12 @@ public:
|
||||
|
||||
tmp = Gimpl::CovShiftBackward(U[mu], mu, U2[nu]);
|
||||
tmp = adj(U2[nu]) * tmp;
|
||||
tmp = Cshift(tmp, nu, -2);
|
||||
Stap += Cshift(tmp, mu, 1);
|
||||
tmp = Gimpl::CshiftLink(tmp, nu, -2);
|
||||
Stap += Gimpl::CshiftLink(tmp, mu, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void RectStaple(GaugeMat &Stap, const GaugeLorentz &Umu, int mu) {
|
||||
RectStapleUnoptimised(Stap, Umu, mu);
|
||||
}
|
||||
static void RectStaple(const GaugeLorentz &Umu, GaugeMat &Stap,
|
||||
std::vector<GaugeMat> &U2, std::vector<GaugeMat> &U,
|
||||
int mu) {
|
||||
if (Gimpl::isPeriodicGaugeField()) {
|
||||
RectStapleOptimised(Stap, U2, U, mu);
|
||||
} else {
|
||||
RectStapleUnoptimised(Stap, Umu, mu);
|
||||
}
|
||||
}
|
||||
|
||||
static void RectStapleUnoptimised(GaugeMat &Stap, const GaugeLorentz &Umu,
|
||||
int mu) {
|
||||
GridBase *grid = Umu.Grid();
|
||||
@ -895,6 +1078,288 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
static void RectStaple(GaugeMat &Stap, const GaugeLorentz &Umu, int mu) {
|
||||
RectStapleUnoptimised(Stap, Umu, mu);
|
||||
}
|
||||
static void RectStaple(const GaugeLorentz &Umu, GaugeMat &Stap,
|
||||
std::vector<GaugeMat> &U2, std::vector<GaugeMat> &U,
|
||||
int mu) {
|
||||
RectStapleOptimised(Stap, U2, U, mu);
|
||||
}
|
||||
//////////////////////////////////////////////////////
|
||||
//Compute the rectangular staples for all orientations
|
||||
//Stap : Array of staples (Nd)
|
||||
//U: Gauge links in each direction (Nd)
|
||||
/////////////////////////////////////////////////////
|
||||
static void RectStapleAll(std::vector<GaugeMat> &Stap, const std::vector<GaugeMat> &U){
|
||||
assert(Stap.size() == Nd); assert(U.size() == Nd);
|
||||
std::vector<GaugeMat> U2(Nd,U[0].Grid());
|
||||
for(int mu=0;mu<Nd;mu++) RectStapleDouble(U2[mu], U[mu], mu);
|
||||
for(int mu=0;mu<Nd;mu++) RectStapleOptimised(Stap[mu], U2, U, mu);
|
||||
}
|
||||
|
||||
//A workspace class allowing reuse of the stencil
|
||||
class RectStaplePaddedAllWorkspace: public WilsonLoopPaddedStencilWorkspace{
|
||||
public:
|
||||
std::vector<Coordinate> getShifts() const override{
|
||||
std::vector<Coordinate> shifts;
|
||||
for (int mu = 0; mu < Nd; mu++){
|
||||
for (int nu = 0; nu < Nd; nu++) {
|
||||
if (nu != mu) {
|
||||
auto genShift = [&](int mushift,int nushift){
|
||||
Coordinate out(Nd,0); out[mu]=mushift; out[nu]=nushift; return out;
|
||||
};
|
||||
|
||||
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
|
||||
shifts.push_back(genShift(0,0));
|
||||
shifts.push_back(genShift(0,+1));
|
||||
shifts.push_back(genShift(+1,+1));
|
||||
shifts.push_back(genShift(+2,0));
|
||||
shifts.push_back(genShift(+1,0));
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
|
||||
shifts.push_back(genShift(0,-1));
|
||||
shifts.push_back(genShift(0,-1));
|
||||
shifts.push_back(genShift(+1,-1));
|
||||
shifts.push_back(genShift(+2,-1));
|
||||
shifts.push_back(genShift(+1,0));
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
|
||||
shifts.push_back(genShift(-1,0));
|
||||
shifts.push_back(genShift(-1,-1));
|
||||
shifts.push_back(genShift(-1,-1));
|
||||
shifts.push_back(genShift(0,-1));
|
||||
shifts.push_back(genShift(+1,-1));
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
|
||||
shifts.push_back(genShift(-1,0));
|
||||
shifts.push_back(genShift(-1,0));
|
||||
shifts.push_back(genShift(-1,+1));
|
||||
shifts.push_back(genShift(0,+1));
|
||||
shifts.push_back(genShift(+1,0));
|
||||
|
||||
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
|
||||
shifts.push_back(genShift(0,0));
|
||||
shifts.push_back(genShift(0,+1));
|
||||
shifts.push_back(genShift(0,+2));
|
||||
shifts.push_back(genShift(+1,+1));
|
||||
shifts.push_back(genShift(+1,0));
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
|
||||
shifts.push_back(genShift(0,-1));
|
||||
shifts.push_back(genShift(0,-2));
|
||||
shifts.push_back(genShift(0,-2));
|
||||
shifts.push_back(genShift(+1,-2));
|
||||
shifts.push_back(genShift(+1,-1));
|
||||
}
|
||||
}
|
||||
}
|
||||
return shifts;
|
||||
}
|
||||
|
||||
int paddingDepth() const override{ return 2; }
|
||||
};
|
||||
|
||||
//Padded cell implementation of the rectangular staple method for all mu, summed over nu != mu
|
||||
//staple: output staple for each mu, summed over nu != mu (Nd)
|
||||
//U_padded: the gauge link fields padded out using the PaddedCell class
|
||||
//Cell: the padded cell class
|
||||
static void RectStaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell) {
|
||||
RectStaplePaddedAllWorkspace wk;
|
||||
RectStaplePaddedAll(staple,U_padded,Cell,wk.getStencil(Cell));
|
||||
}
|
||||
|
||||
//Padded cell implementation of the rectangular staple method for all mu, summed over nu != mu
|
||||
//staple: output staple for each mu, summed over nu != mu (Nd)
|
||||
//U_padded: the gauge link fields padded out using the PaddedCell class
|
||||
//Cell: the padded cell class
|
||||
//gStencil: the stencil
|
||||
static void RectStaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) {
|
||||
double t0 = usecond();
|
||||
assert(U_padded.size() == Nd); assert(staple.size() == Nd);
|
||||
assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back());
|
||||
assert(Cell.depth >= 2);
|
||||
GridBase *ggrid = U_padded[0].Grid(); //padded cell grid
|
||||
|
||||
size_t nshift = gStencil._npoints;
|
||||
int mu_off_delta = nshift / Nd;
|
||||
|
||||
//Open views to padded gauge links and keep open over mu loop
|
||||
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
|
||||
size_t vsize = Nd*sizeof(GaugeViewType);
|
||||
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
|
||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = U_padded[i].View(AcceleratorRead);
|
||||
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
|
||||
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
|
||||
|
||||
GaugeMat gStaple(ggrid); //temp staple object on padded grid
|
||||
|
||||
int offset = 0;
|
||||
for(int mu=0; mu<Nd; mu++){
|
||||
|
||||
{ //view scope
|
||||
autoView( gStaple_v , gStaple, AcceleratorWrite);
|
||||
auto gStencil_v = gStencil.View();
|
||||
|
||||
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
|
||||
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
|
||||
stencil_ss = Zero();
|
||||
int s=offset;
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
if(nu != mu){
|
||||
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
|
||||
GeneralStencilEntry const* e = gStencil_v.GetEntry(s++,ss);
|
||||
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
auto U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
auto U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
auto U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
}
|
||||
}
|
||||
coalescedWrite(gStaple_v[ss],stencil_ss);
|
||||
}
|
||||
);
|
||||
offset += mu_off_delta;
|
||||
}//kernel/view scope
|
||||
|
||||
staple[mu] = Cell.Extract(gStaple);
|
||||
}//mu loop
|
||||
|
||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
|
||||
free(Ug_dirs_v_host);
|
||||
acceleratorFreeDevice(Ug_dirs_v);
|
||||
|
||||
double t1 = usecond();
|
||||
|
||||
std::cout << GridLogPerformance << "RectStaplePaddedAll timings:" << (t1-t0)/1000 << "ms" << std::endl;
|
||||
}
|
||||
|
||||
//A workspace for reusing the PaddedCell and GeneralLocalStencil objects
|
||||
class StapleAndRectStapleAllWorkspace: public WilsonLoopPaddedWorkspace{
|
||||
public:
|
||||
StapleAndRectStapleAllWorkspace(){
|
||||
this->addStencil(new StaplePaddedAllWorkspace);
|
||||
this->addStencil(new RectStaplePaddedAllWorkspace);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
//Compute the 1x1 and 1x2 staples for all orientations
|
||||
//Stap : Array of staples (Nd)
|
||||
//RectStap: Array of rectangular staples (Nd)
|
||||
//U: Gauge links in each direction (Nd)
|
||||
/////////////////////////////////////////////////////
|
||||
static void StapleAndRectStapleAll(std::vector<GaugeMat> &Stap, std::vector<GaugeMat> &RectStap, const std::vector<GaugeMat> &U){
|
||||
StapleAndRectStapleAllWorkspace wk;
|
||||
StapleAndRectStapleAll(Stap,RectStap,U,wk);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
//Compute the 1x1 and 1x2 staples for all orientations
|
||||
//Stap : Array of staples (Nd)
|
||||
//RectStap: Array of rectangular staples (Nd)
|
||||
//U: Gauge links in each direction (Nd)
|
||||
//wk: a workspace containing stored PaddedCell and GeneralLocalStencil objects to maximize reuse
|
||||
/////////////////////////////////////////////////////
|
||||
static void StapleAndRectStapleAll(std::vector<GaugeMat> &Stap, std::vector<GaugeMat> &RectStap, const std::vector<GaugeMat> &U, StapleAndRectStapleAllWorkspace &wk){
|
||||
#if 0
|
||||
StapleAll(Stap, U);
|
||||
RectStapleAll(RectStap, U);
|
||||
#else
|
||||
double t0 = usecond();
|
||||
|
||||
GridCartesian* unpadded_grid = dynamic_cast<GridCartesian*>(U[0].Grid());
|
||||
const PaddedCell &Ghost = wk.getPaddedCell(unpadded_grid);
|
||||
|
||||
CshiftImplGauge<Gimpl> cshift_impl;
|
||||
std::vector<GaugeMat> U_pad(Nd, Ghost.grids.back());
|
||||
for(int mu=0;mu<Nd;mu++) U_pad[mu] = Ghost.Exchange(U[mu], cshift_impl);
|
||||
double t1 = usecond();
|
||||
StaplePaddedAll(Stap, U_pad, Ghost, wk.getStencil(0,unpadded_grid) );
|
||||
double t2 = usecond();
|
||||
RectStaplePaddedAll(RectStap, U_pad, Ghost, wk.getStencil(1,unpadded_grid));
|
||||
double t3 = usecond();
|
||||
std::cout << GridLogPerformance << "StapleAndRectStapleAll timings: pad:" << (t1-t0)/1000 << "ms, staple:" << (t2-t1)/1000 << "ms, rect-staple:" << (t3-t2)/1000 << "ms" << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Wilson loop of size (R1, R2), oriented in mu,nu plane
|
||||
//////////////////////////////////////////////////
|
||||
|
@ -79,10 +79,10 @@ public:
|
||||
this->_entries.resize(npoints* osites);
|
||||
this->_entries_p = &_entries[0];
|
||||
|
||||
|
||||
thread_for(site, osites, {
|
||||
Coordinate Coor;
|
||||
Coordinate NbrCoor;
|
||||
for(Integer site=0;site<osites;site++){
|
||||
|
||||
for(Integer ii=0;ii<npoints;ii++){
|
||||
Integer lex = site*npoints+ii;
|
||||
GeneralStencilEntry SE;
|
||||
@ -132,7 +132,7 @@ public:
|
||||
////////////////////////////////////////////////
|
||||
this->_entries[lex] = SE;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -32,6 +32,7 @@
|
||||
|
||||
#include <Grid/stencil/SimpleCompressor.h> // subdir aggregate
|
||||
#include <Grid/stencil/Lebesgue.h> // subdir aggregate
|
||||
#include <Grid/stencil/GeneralLocalStencil.h>
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Must not lose sight that goal is to be able to construct really efficient
|
||||
|
@ -73,6 +73,16 @@ vobj coalescedReadPermute(const vobj & __restrict__ vec,int ptype,int doperm,int
|
||||
return vec;
|
||||
}
|
||||
}
|
||||
//'perm_mask' acts as a bitmask
|
||||
template<class vobj> accelerator_inline
|
||||
vobj coalescedReadGeneralPermute(const vobj & __restrict__ vec,int perm_mask,int nd,int lane=0)
|
||||
{
|
||||
auto obj = vec, tmp = vec;
|
||||
for (int d=0;d<nd;d++)
|
||||
if (perm_mask & (0x1 << d)) { permute(obj,tmp,d); tmp=obj;}
|
||||
return obj;
|
||||
}
|
||||
|
||||
template<class vobj> accelerator_inline
|
||||
void coalescedWrite(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0)
|
||||
{
|
||||
@ -83,7 +93,7 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__
|
||||
{
|
||||
vstream(vec, extracted);
|
||||
}
|
||||
#else
|
||||
#else //==GRID_SIMT
|
||||
|
||||
|
||||
//#ifndef GRID_SYCL
|
||||
@ -166,6 +176,14 @@ typename vobj::scalar_object coalescedReadPermute(const vobj & __restrict__ vec,
|
||||
return extractLane(plane,vec);
|
||||
}
|
||||
template<class vobj> accelerator_inline
|
||||
typename vobj::scalar_object coalescedReadGeneralPermute(const vobj & __restrict__ vec,int perm_mask,int nd,int lane=acceleratorSIMTlane(vobj::Nsimd()))
|
||||
{
|
||||
int plane = lane;
|
||||
for (int d=0;d<nd;d++)
|
||||
plane = (perm_mask & (0x1 << d)) ? plane ^ (vobj::Nsimd() >> (d + 1)) : plane;
|
||||
return extractLane(plane,vec);
|
||||
}
|
||||
template<class vobj> accelerator_inline
|
||||
void coalescedWrite(vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted,int lane=acceleratorSIMTlane(vobj::Nsimd()))
|
||||
{
|
||||
insertLane(lane,vec,extracted);
|
||||
|
188
tests/debug/Test_iwasaki_action_newstaple.cc
Normal file
188
tests/debug/Test_iwasaki_action_newstaple.cc
Normal file
@ -0,0 +1,188 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_iwasaki_action_newstaple.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// PlaqPlusRectangleActoin
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
template<class Gimpl>
|
||||
class PlaqPlusRectangleActionOrig : public Action<typename Gimpl::GaugeField> {
|
||||
public:
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
private:
|
||||
RealD c_plaq;
|
||||
RealD c_rect;
|
||||
|
||||
public:
|
||||
PlaqPlusRectangleActionOrig(RealD b,RealD c): c_plaq(b),c_rect(c){};
|
||||
|
||||
virtual std::string action_name(){return "PlaqPlusRectangleActionOrig";}
|
||||
|
||||
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {}; // noop as no pseudoferms
|
||||
|
||||
virtual std::string LogParameters(){
|
||||
std::stringstream sstream;
|
||||
sstream << GridLogMessage << "["<<action_name() <<"] c_plaq: " << c_plaq << std::endl;
|
||||
sstream << GridLogMessage << "["<<action_name() <<"] c_rect: " << c_rect << std::endl;
|
||||
return sstream.str();
|
||||
}
|
||||
|
||||
|
||||
virtual RealD S(const GaugeField &U) {
|
||||
RealD vol = U.Grid()->gSites();
|
||||
|
||||
RealD plaq = WilsonLoops<Gimpl>::avgPlaquette(U);
|
||||
RealD rect = WilsonLoops<Gimpl>::avgRectangle(U);
|
||||
|
||||
RealD action=c_plaq*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5
|
||||
+c_rect*(1.0 -rect)*(Nd*(Nd-1.0))*vol;
|
||||
|
||||
return action;
|
||||
};
|
||||
|
||||
virtual void deriv(const GaugeField &Umu,GaugeField & dSdU) {
|
||||
//extend Ta to include Lorentz indexes
|
||||
RealD factor_p = c_plaq/RealD(Nc)*0.5;
|
||||
RealD factor_r = c_rect/RealD(Nc)*0.5;
|
||||
|
||||
GridBase *grid = Umu.Grid();
|
||||
|
||||
std::vector<GaugeLinkField> U (Nd,grid);
|
||||
std::vector<GaugeLinkField> U2(Nd,grid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu],U[mu],mu);
|
||||
}
|
||||
|
||||
GaugeLinkField dSdU_mu(grid);
|
||||
GaugeLinkField staple(grid);
|
||||
|
||||
for (int mu=0; mu < Nd; mu++){
|
||||
|
||||
// Staple in direction mu
|
||||
|
||||
WilsonLoops<Gimpl>::Staple(staple,Umu,mu);
|
||||
|
||||
dSdU_mu = Ta(U[mu]*staple)*factor_p;
|
||||
|
||||
WilsonLoops<Gimpl>::RectStaple(Umu,staple,U2,U,mu);
|
||||
|
||||
dSdU_mu = dSdU_mu + Ta(U[mu]*staple)*factor_r;
|
||||
|
||||
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
// Convenience for common physically defined cases.
|
||||
//
|
||||
// RBC c1 parameterisation is not really RBC but don't have good
|
||||
// reference and we are happy to change name if prior use of this plaq coeff
|
||||
// parameterisation is made known to us.
|
||||
template<class Gimpl>
|
||||
class RBCGaugeActionOrig : public PlaqPlusRectangleActionOrig<Gimpl> {
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
RBCGaugeActionOrig(RealD beta,RealD c1) : PlaqPlusRectangleActionOrig<Gimpl>(beta*(1.0-8.0*c1), beta*c1) {};
|
||||
virtual std::string action_name(){return "RBCGaugeActionOrig";}
|
||||
};
|
||||
|
||||
template<class Gimpl>
|
||||
class IwasakiGaugeActionOrig : public RBCGaugeActionOrig<Gimpl> {
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
IwasakiGaugeActionOrig(RealD beta) : RBCGaugeActionOrig<Gimpl>(beta,-0.331) {};
|
||||
virtual std::string action_name(){return "IwasakiGaugeActionOrig";}
|
||||
};
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
Coordinate latt_size = GridDefaultLatt();
|
||||
Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
|
||||
Coordinate mpi_layout = GridDefaultMpi();
|
||||
std::cout << " mpi "<<mpi_layout<<std::endl;
|
||||
std::cout << " simd "<<simd_layout<<std::endl;
|
||||
std::cout << " latt "<<latt_size<<std::endl;
|
||||
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG pRNG(&GRID);
|
||||
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
LatticeGaugeField U(&GRID);
|
||||
|
||||
SU<Nc>::HotConfiguration(pRNG,U);
|
||||
|
||||
//#define PRD
|
||||
#ifdef PRD
|
||||
typedef PeriodicGimplD Gimpl;
|
||||
#else
|
||||
typedef ConjugateGimplD Gimpl;
|
||||
std::vector<int> conj_dirs(Nd,0); conj_dirs[0]=1; conj_dirs[3]=1;
|
||||
Gimpl::setDirections(conj_dirs);
|
||||
#endif
|
||||
|
||||
typedef typename WilsonLoops<Gimpl>::GaugeMat GaugeMat;
|
||||
typedef typename WilsonLoops<Gimpl>::GaugeLorentz GaugeLorentz;
|
||||
|
||||
GaugeLorentz derivOrig(&GRID), derivNew(&GRID);
|
||||
double beta = 2.13;
|
||||
IwasakiGaugeActionOrig<Gimpl> action_orig(beta);
|
||||
IwasakiGaugeAction<Gimpl> action_new(beta);
|
||||
|
||||
double torig=0, tnew=0;
|
||||
int ntest = 10;
|
||||
for(int i=0;i<ntest;i++){
|
||||
double t0 = usecond();
|
||||
action_orig.deriv(U, derivOrig);
|
||||
double t1 = usecond();
|
||||
action_new.deriv(U, derivNew);
|
||||
double t2 = usecond();
|
||||
|
||||
GaugeLorentz diff = derivOrig - derivNew;
|
||||
double n = norm2(diff);
|
||||
std::cout << GridLogMessage << "Difference " << n << " (expect 0)" << std::endl;
|
||||
assert(n<1e-10);
|
||||
|
||||
std::cout << GridLogMessage << "Timings orig: " << (t1-t0)/1000 << "ms, new: " << (t2-t1)/1000 << "ms" << std::endl;
|
||||
torig += (t1-t0)/1000; tnew += (t2-t1)/1000;
|
||||
}
|
||||
std::cout << GridLogMessage << "Avg timings " << ntest << " iterations: orig:" << torig/ntest << "ms, new:" << tnew/ntest << "ms" << std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
94
tests/debug/Test_optimized_staple_gaugebc.cc
Normal file
94
tests/debug/Test_optimized_staple_gaugebc.cc
Normal file
@ -0,0 +1,94 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_optimized_staple_gaugebc.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/lattice/PaddedCell.h>
|
||||
#include <Grid/stencil/GeneralLocalStencil.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
Coordinate latt_size = GridDefaultLatt();
|
||||
Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
|
||||
Coordinate mpi_layout = GridDefaultMpi();
|
||||
std::cout << " mpi "<<mpi_layout<<std::endl;
|
||||
std::cout << " simd "<<simd_layout<<std::endl;
|
||||
std::cout << " latt "<<latt_size<<std::endl;
|
||||
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG pRNG(&GRID);
|
||||
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
LatticeGaugeField U(&GRID);
|
||||
|
||||
SU<Nc>::HotConfiguration(pRNG,U);
|
||||
|
||||
//#define PRD
|
||||
#ifdef PRD
|
||||
typedef PeriodicGimplD Gimpl;
|
||||
#else
|
||||
typedef ConjugateGimplD Gimpl;
|
||||
std::vector<int> conj_dirs(Nd,0); conj_dirs[0]=1; conj_dirs[3]=1;
|
||||
Gimpl::setDirections(conj_dirs);
|
||||
#endif
|
||||
|
||||
typedef typename WilsonLoops<Gimpl>::GaugeMat GaugeMat;
|
||||
typedef typename WilsonLoops<Gimpl>::GaugeLorentz GaugeLorentz;
|
||||
|
||||
int count = 0;
|
||||
double torig=0, topt=0;
|
||||
|
||||
std::vector<GaugeMat> Umu(Nd,&GRID), U2(Nd,&GRID);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
Umu[mu] = PeekIndex<LorentzIndex>(U,mu);
|
||||
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu], Umu[mu], mu);
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Checking optimized vs unoptimized RectStaple" << std::endl;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
GaugeMat staple_orig(&GRID), staple_opt(&GRID), staple_U2(&GRID);
|
||||
double t0 = usecond();
|
||||
WilsonLoops<Gimpl>::RectStapleUnoptimised(staple_orig,U,mu);
|
||||
double t1 = usecond();
|
||||
WilsonLoops<Gimpl>::RectStapleOptimised(staple_opt, U2, Umu, mu);
|
||||
double t2 = usecond();
|
||||
torig += t1-t0; topt += t2-t1;
|
||||
++count;
|
||||
|
||||
GaugeMat diff = staple_orig - staple_opt;
|
||||
double n = norm2(diff);
|
||||
std::cout << GridLogMessage << mu << " " << n << std::endl;
|
||||
assert(n<1e-10);
|
||||
}
|
||||
std::cout << GridLogMessage << "RectStaple timings orig: " << torig/1000/count << "ms, optimized: " << topt/1000/count << "ms" << std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
580
tests/debug/Test_padded_cell_staple.cc
Normal file
580
tests/debug/Test_padded_cell_staple.cc
Normal file
@ -0,0 +1,580 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_padded_cell_staple.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/lattice/PaddedCell.h>
|
||||
#include <Grid/stencil/GeneralLocalStencil.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
template <class Gimpl> class WilsonLoopsTest : public Gimpl {
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
typedef typename Gimpl::GaugeLinkField GaugeMat;
|
||||
typedef typename Gimpl::GaugeField GaugeLorentz;
|
||||
|
||||
|
||||
//Original implementation
|
||||
static void StapleOrig(GaugeMat &staple, const GaugeLorentz &Umu, int mu,
|
||||
int nu) {
|
||||
|
||||
GridBase *grid = Umu.Grid();
|
||||
|
||||
std::vector<GaugeMat> U(Nd, grid);
|
||||
for (int d = 0; d < Nd; d++) {
|
||||
U[d] = PeekIndex<LorentzIndex>(Umu, d);
|
||||
}
|
||||
staple = Zero();
|
||||
|
||||
if (nu != mu) {
|
||||
|
||||
// mu
|
||||
// ^
|
||||
// |__> nu
|
||||
|
||||
// __
|
||||
// |
|
||||
// __|
|
||||
//
|
||||
|
||||
//Forward: Out(x) = Link(x)*field(x+mu)
|
||||
//Backward: Out(x) = Link^dag(x-mu)*field(x-mu)
|
||||
//ShiftStaple: Link(x) = Link(x+mu)
|
||||
|
||||
//tmp1 = U^dag_nu(x-nu)
|
||||
//tmp2 = U^dag_mu(x-mu) tmp1(x-mu) = U^dag_mu(x-mu) U^dag_nu(x-nu-mu)
|
||||
//tmp3 = U_nu(x) tmp2(x+nu) = U_nu(x)U^dag_mu(x-mu+nu) U^dag_nu(x-mu)
|
||||
//tmp4 = tmp(x+mu) = U_nu(x+mu)U^dag_mu(x+nu) U^dag_nu(x)
|
||||
|
||||
staple += Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward(
|
||||
U[nu], nu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu, Gimpl::CovShiftIdentityBackward(U[nu], nu))),
|
||||
mu);
|
||||
|
||||
// __
|
||||
// |
|
||||
// |__
|
||||
//
|
||||
//
|
||||
|
||||
//tmp1 = U_mu^dag(x-mu) U_nu(x-mu)
|
||||
//tmp2 = U_nu^dag(x-nu) tmp1(x-nu) = U_nu^dag(x-nu) U_mu^dag(x-mu-nu) U_nu(x-mu-nu)
|
||||
//tmp3 = tmp2(x+mu) = U_nu^dag(x-nu+mu) U_mu^dag(x-nu) U_nu(x-nu)
|
||||
staple += Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftBackward(U[nu], nu,
|
||||
Gimpl::CovShiftBackward(U[mu], mu, U[nu])),
|
||||
mu);
|
||||
}
|
||||
}
|
||||
|
||||
static void StaplePadded(GaugeMat &staple, const GaugeLorentz &U, int mu,
|
||||
int nu) {
|
||||
if(nu==mu){
|
||||
staple = Zero();
|
||||
return;
|
||||
}
|
||||
double peek = 0, construct = 0, exchange = 0, coord = 0, stencil =0, kernel = 0, extract = 0, total = 0;
|
||||
|
||||
double tstart = usecond();
|
||||
double t=tstart;
|
||||
|
||||
PaddedCell Ghost(1, (GridCartesian*)U.Grid());
|
||||
|
||||
construct += usecond() - t;
|
||||
|
||||
t=usecond();
|
||||
GaugeMat U_mu = PeekIndex<LorentzIndex>(U, mu);
|
||||
GaugeMat U_nu = PeekIndex<LorentzIndex>(U, nu);
|
||||
peek += usecond() - t;
|
||||
|
||||
t=usecond();
|
||||
CshiftImplGauge<Gimpl> cshift_impl;
|
||||
GaugeMat Ug_mu = Ghost.Exchange(U_mu, cshift_impl);
|
||||
GaugeMat Ug_nu = Ghost.Exchange(U_nu, cshift_impl);
|
||||
exchange += usecond() - t;
|
||||
|
||||
GridBase *ggrid = Ug_mu.Grid();
|
||||
|
||||
GaugeMat gStaple(ggrid);
|
||||
|
||||
t=usecond();
|
||||
Coordinate shift_0(Nd,0);
|
||||
Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
|
||||
Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
|
||||
Coordinate shift_mnu(Nd,0); shift_mnu[nu]=-1;
|
||||
Coordinate shift_mnu_pmu(Nd,0); shift_mnu_pmu[nu]=-1; shift_mnu_pmu[mu]=1;
|
||||
|
||||
std::vector<Coordinate> shifts;
|
||||
|
||||
//U_nu(x+mu)U^dag_mu(x+nu) U^dag_nu(x)
|
||||
shifts.push_back(shift_0);
|
||||
shifts.push_back(shift_nu);
|
||||
shifts.push_back(shift_mu);
|
||||
|
||||
//U_nu^dag(x-nu+mu) U_mu^dag(x-nu) U_nu(x-nu)
|
||||
shifts.push_back(shift_mnu);
|
||||
shifts.push_back(shift_mnu);
|
||||
shifts.push_back(shift_mnu_pmu);
|
||||
coord += usecond()-t;
|
||||
|
||||
t=usecond();
|
||||
GeneralLocalStencil gStencil(ggrid,shifts);
|
||||
stencil += usecond() -t;
|
||||
|
||||
t=usecond();
|
||||
{
|
||||
autoView( gStaple_v , gStaple, AcceleratorWrite);
|
||||
auto gStencil_v = gStencil.View();
|
||||
autoView( Ug_mu_v , Ug_mu, AcceleratorRead);
|
||||
autoView( Ug_nu_v , Ug_nu, AcceleratorRead);
|
||||
|
||||
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
|
||||
GeneralStencilEntry const* e = gStencil_v.GetEntry(0,ss);
|
||||
auto Udag_nu_x = adj(coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(1,ss);
|
||||
auto Udag_mu_xpnu = adj(coalescedReadGeneralPermute(Ug_mu_v[e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(2,ss);
|
||||
auto U_nu_xpmu = coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd);
|
||||
|
||||
auto stencil_ss = U_nu_xpmu * Udag_mu_xpnu * Udag_nu_x;
|
||||
|
||||
e = gStencil_v.GetEntry(3,ss);
|
||||
auto U_nu_xmnu = coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(4,ss);
|
||||
auto Udag_mu_xmnu = adj(coalescedReadGeneralPermute(Ug_mu_v[e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(5,ss);
|
||||
auto Udag_nu_xmnu_pmu = adj(coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd));
|
||||
|
||||
stencil_ss = stencil_ss + Udag_nu_xmnu_pmu * Udag_mu_xmnu * U_nu_xmnu;
|
||||
|
||||
coalescedWrite(gStaple_v[ss],stencil_ss);
|
||||
}
|
||||
);
|
||||
} //ensure views are all closed!
|
||||
kernel += usecond() - t;
|
||||
|
||||
t=usecond();
|
||||
staple = Ghost.Extract(gStaple);
|
||||
extract += usecond()-t;
|
||||
|
||||
total += usecond() - tstart;
|
||||
std::cout << GridLogMessage << "StaplePadded timings peek:" << peek << " construct:" << construct << " exchange:" << exchange << " coord:" << coord << " stencil:" << stencil << " kernel:" << kernel << " extract:" << extract << " total:" << total << std::endl;
|
||||
}
|
||||
|
||||
static void RectStapleOrig(GaugeMat &Stap, const GaugeLorentz &Umu,
|
||||
int mu) {
|
||||
GridBase *grid = Umu.Grid();
|
||||
|
||||
std::vector<GaugeMat> U(Nd, grid);
|
||||
for (int d = 0; d < Nd; d++) {
|
||||
U[d] = PeekIndex<LorentzIndex>(Umu, d);
|
||||
}
|
||||
|
||||
Stap = Zero();
|
||||
|
||||
for (int nu = 0; nu < Nd; nu++) {
|
||||
if (nu != mu) {
|
||||
// __ ___
|
||||
// | __ |
|
||||
//
|
||||
//tmp1 = U_nu^dag(x-nu)
|
||||
//tmp2 = U_mu^dag(x-mu)tmp1(x-mu) = U_mu^dag(x-mu) U_nu^dag(x-nu-mu)
|
||||
//tmp3 = U_mu^dag(x-mu)tmp2(x-mu) = U_mu^dag(x-mu) U_mu^dag(x-2mu) U_nu^dag(x-nu-2mu)
|
||||
//tmp4 = U_nu(x)tmp3(x+nu) = U_nu(x)U_mu^dag(x-mu+nu) U_mu^dag(x-2mu+nu) U_nu^dag(x-2mu)
|
||||
//tmp5 = U_mu(x)tmp4(x+mu) = U_mu(x)U_nu(x+mu)U_mu^dag(x+nu) U_mu^dag(x-mu+nu) U_nu^dag(x-mu)
|
||||
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
|
||||
|
||||
Stap += Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward(
|
||||
U[mu], mu,
|
||||
Gimpl::CovShiftForward(
|
||||
U[nu], nu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu,
|
||||
Gimpl::CovShiftIdentityBackward(U[nu], nu))))),
|
||||
mu);
|
||||
|
||||
// __
|
||||
// |__ __ |
|
||||
|
||||
//tmp1 = U^dag_mu(x-mu)U_nu(x-mu)
|
||||
//tmp2 = U^dag_mu(x-mu)tmp1(x-mu) = U^dag_mu(x-mu)U^dag_mu(x-2mu)U_nu(x-2mu)
|
||||
//tmp3 = U^dag_nu(x-nu)tmp2(x-nu) = U^dag_nu(x-nu)U^dag_mu(x-mu-nu)U^dag_mu(x-2mu-nu)U_nu(x-2mu-nu)
|
||||
//tmp4 = U_mu(x)tmp3(x+mu) = U_mu(x)U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)
|
||||
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
|
||||
|
||||
Stap += Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward(
|
||||
U[mu], mu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[nu], nu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu, Gimpl::CovShiftBackward(U[mu], mu, U[nu])))),
|
||||
mu);
|
||||
|
||||
// __
|
||||
// |__ __ |
|
||||
//Forward: Out(x) = Link(x)*field(x+mu)
|
||||
//Backward: Out(x) = Link^dag(x-mu)*field(x-mu)
|
||||
//ShiftStaple: Link(x) = Link(x+mu)
|
||||
|
||||
//tmp1 = U_nu(x)U_mu(x+nu)
|
||||
//tmp2 = U^dag_mu(x-mu)tmp1(x-mu) = U^dag_mu(x-mu)U_nu(x-mu)U_mu(x+nu-mu)
|
||||
//tmp3 = U^dag_mu(x-mu)tmp2(x-mu) = U^dag_mu(x-mu)U^dag_mu(x-2mu)U_nu(x-2mu)U_mu(x+nu-2mu)
|
||||
//tmp4 = U^dag_nu(x-nu)tmp3(x-nu) = U^dag_nu(x-nu)U^dag_mu(x-mu-nu)U^dag_mu(x-2mu-nu)U_nu(x-2mu-nu)U_mu(x-2mu)
|
||||
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
|
||||
Stap += Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftBackward(
|
||||
U[nu], nu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu, Gimpl::CovShiftForward(U[nu], nu, U[mu])))),
|
||||
mu);
|
||||
|
||||
// __ ___
|
||||
// |__ |
|
||||
//tmp1 = U_nu^dag(x-nu)U_mu(x-nu)
|
||||
//tmp2 = U_mu^dag(x-mu)tmp1(x-mu) = U_mu^dag(x-mu)U_nu^dag(x-mu-nu)U_mu(x-mu-nu)
|
||||
//tmp3 = U_mu^dag(x-mu)tmp2(x-mu) = U_mu^dag(x-mu)U_mu^dag(x-2mu)U_nu^dag(x-2mu-nu)U_mu(x-2mu-nu)
|
||||
//tmp4 = U_nu(x)tmp3(x+nu) = U_nu(x)U_mu^dag(x-mu+nu)U_mu^dag(x-2mu+nu)U_nu^dag(x-2mu)U_mu(x-2mu)
|
||||
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
|
||||
Stap += Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward(
|
||||
U[nu], nu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu, Gimpl::CovShiftBackward(U[nu], nu, U[mu])))),
|
||||
mu);
|
||||
|
||||
// --
|
||||
// | |
|
||||
//
|
||||
// | |
|
||||
//tmp1 = U_nu^dag(x-nu)
|
||||
//tmp2 = U_nu^dag(x-nu)tmp1(x-nu) = U_nu^dag(x-nu)U_nu^dag(x-2nu)
|
||||
//tmp3 = U_mu^dag(x-mu)tmp2(x-mu) = U_mu^dag(x-mu)U_nu^dag(x-mu-nu)U_nu^dag(x-mu-2nu)
|
||||
//tmp4 = U_nu(x)tmp3(x+nu) = U_nu(x)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_nu^dag(x-mu-nu)
|
||||
//tmp5 = U_nu(x)tmp4(x+nu) = U_nu(x)U_nu(x+nu)U_mu^dag(x-mu+2nu)U_nu^dag(x-mu+nu)U_nu^dag(x-mu)
|
||||
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
|
||||
Stap += Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward(
|
||||
U[nu], nu,
|
||||
Gimpl::CovShiftForward(
|
||||
U[nu], nu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[nu], nu,
|
||||
Gimpl::CovShiftIdentityBackward(U[nu], nu))))),
|
||||
mu);
|
||||
|
||||
// | |
|
||||
//
|
||||
// | |
|
||||
// --
|
||||
//tmp1 = U_nu(x)U_nu(x+nu)
|
||||
//tmp2 = U_mu^dag(x-mu)tmp1(x-mu) = U_mu^dag(x-mu)U_nu(x-mu)U_nu(x-mu+nu)
|
||||
//tmp3 = U_nu^dag(x-nu)tmp2(x-nu) = U_nu^dag(x-nu)U_mu^dag(x-mu-nu)U_nu(x-mu-nu)U_nu(x-mu)
|
||||
//tmp4 = U_nu^dag(x-nu)tmp3(x-nu) = U_nu^dag(x-nu)U_nu^dag(x-2nu)U_mu^dag(x-mu-2nu)U_nu(x-mu-2nu)U_nu(x-mu-nu)
|
||||
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
|
||||
Stap += Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftBackward(
|
||||
U[nu], nu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[nu], nu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu, Gimpl::CovShiftForward(U[nu], nu, U[nu])))),
|
||||
mu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void RectStaplePadded(GaugeMat &Stap, const GaugeLorentz &U,
|
||||
int mu) {
|
||||
PaddedCell Ghost(2,(GridCartesian*)U.Grid());
|
||||
GridBase *ggrid = Ghost.grids.back();
|
||||
|
||||
CshiftImplGauge<Gimpl> cshift_impl;
|
||||
std::vector<GaugeMat> Ug_dirs(Nd,ggrid);
|
||||
for(int i=0;i<Nd;i++) Ug_dirs[i] = Ghost.Exchange(PeekIndex<LorentzIndex>(U, i), cshift_impl);
|
||||
|
||||
GaugeMat gStaple(ggrid);
|
||||
|
||||
std::vector<Coordinate> shifts;
|
||||
for (int nu = 0; nu < Nd; nu++) {
|
||||
if (nu != mu) {
|
||||
auto genShift = [&](int mushift,int nushift){
|
||||
Coordinate out(Nd,0); out[mu]=mushift; out[nu]=nushift; return out;
|
||||
};
|
||||
|
||||
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
|
||||
shifts.push_back(genShift(0,0));
|
||||
shifts.push_back(genShift(0,+1));
|
||||
shifts.push_back(genShift(+1,+1));
|
||||
shifts.push_back(genShift(+2,0));
|
||||
shifts.push_back(genShift(+1,0));
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
|
||||
shifts.push_back(genShift(0,-1));
|
||||
shifts.push_back(genShift(0,-1));
|
||||
shifts.push_back(genShift(+1,-1));
|
||||
shifts.push_back(genShift(+2,-1));
|
||||
shifts.push_back(genShift(+1,0));
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
|
||||
shifts.push_back(genShift(-1,0));
|
||||
shifts.push_back(genShift(-1,-1));
|
||||
shifts.push_back(genShift(-1,-1));
|
||||
shifts.push_back(genShift(0,-1));
|
||||
shifts.push_back(genShift(+1,-1));
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
|
||||
shifts.push_back(genShift(-1,0));
|
||||
shifts.push_back(genShift(-1,0));
|
||||
shifts.push_back(genShift(-1,+1));
|
||||
shifts.push_back(genShift(0,+1));
|
||||
shifts.push_back(genShift(+1,0));
|
||||
|
||||
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
|
||||
shifts.push_back(genShift(0,0));
|
||||
shifts.push_back(genShift(0,+1));
|
||||
shifts.push_back(genShift(0,+2));
|
||||
shifts.push_back(genShift(+1,+1));
|
||||
shifts.push_back(genShift(+1,0));
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
|
||||
shifts.push_back(genShift(0,-1));
|
||||
shifts.push_back(genShift(0,-2));
|
||||
shifts.push_back(genShift(0,-2));
|
||||
shifts.push_back(genShift(+1,-2));
|
||||
shifts.push_back(genShift(+1,-1));
|
||||
}
|
||||
}
|
||||
size_t nshift = shifts.size();
|
||||
|
||||
GeneralLocalStencil gStencil(ggrid,shifts);
|
||||
{
|
||||
autoView( gStaple_v , gStaple, AcceleratorWrite);
|
||||
auto gStencil_v = gStencil.View();
|
||||
|
||||
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
|
||||
size_t vsize = Nd*sizeof(GaugeViewType);
|
||||
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
|
||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = Ug_dirs[i].View(AcceleratorRead);
|
||||
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
|
||||
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
|
||||
|
||||
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
|
||||
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
|
||||
stencil_ss = Zero();
|
||||
int s=0;
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
if(nu != mu){
|
||||
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
|
||||
GeneralStencilEntry const* e = gStencil_v.GetEntry(s++,ss);
|
||||
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
auto U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
auto U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
auto U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
e = gStencil_v.GetEntry(s++,ss);
|
||||
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
||||
|
||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
||||
|
||||
}
|
||||
}
|
||||
assert(s==nshift);
|
||||
coalescedWrite(gStaple_v[ss],stencil_ss);
|
||||
}
|
||||
);
|
||||
|
||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
|
||||
free(Ug_dirs_v_host);
|
||||
acceleratorFreeDevice(Ug_dirs_v);
|
||||
}
|
||||
Stap = Ghost.Extract(gStaple);
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
Coordinate latt_size = GridDefaultLatt();
|
||||
Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
|
||||
Coordinate mpi_layout = GridDefaultMpi();
|
||||
std::cout << " mpi "<<mpi_layout<<std::endl;
|
||||
std::cout << " simd "<<simd_layout<<std::endl;
|
||||
std::cout << " latt "<<latt_size<<std::endl;
|
||||
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG pRNG(&GRID);
|
||||
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
LatticeGaugeField U(&GRID);
|
||||
|
||||
SU<Nc>::HotConfiguration(pRNG,U);
|
||||
|
||||
//typedef PeriodicGimplD Gimpl;
|
||||
typedef ConjugateGimplD Gimpl;
|
||||
std::vector<int> conj_dirs(Nd,0); conj_dirs[0]=1; conj_dirs[3]=1;
|
||||
Gimpl::setDirections(conj_dirs);
|
||||
|
||||
typedef typename WilsonLoopsTest<Gimpl>::GaugeMat GaugeMat;
|
||||
typedef typename WilsonLoopsTest<Gimpl>::GaugeLorentz GaugeLorentz;
|
||||
|
||||
std::cout << GridLogMessage << "Checking Staple" << std::endl;
|
||||
int count = 0;
|
||||
double torig=0, tpadded=0;
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
if(mu != nu){
|
||||
GaugeMat staple_orig(&GRID), staple_padded(&GRID);
|
||||
double t0 = usecond();
|
||||
WilsonLoopsTest<Gimpl>::StapleOrig(staple_orig,U,mu,nu);
|
||||
double t1 = usecond();
|
||||
WilsonLoopsTest<Gimpl>::StaplePadded(staple_padded,U,mu,nu);
|
||||
double t2 = usecond();
|
||||
torig += t1-t0; tpadded += t2-t1;
|
||||
++count;
|
||||
|
||||
GaugeMat diff = staple_orig - staple_padded;
|
||||
double n = norm2(diff);
|
||||
std::cout << GridLogMessage << mu << " " << nu << " " << n << std::endl;
|
||||
assert(n<1e-10);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << "Staple timings orig: " << torig/1000/count << "ms, padded: " << tpadded/1000/count << "ms" << std::endl;
|
||||
count=0; torig=tpadded=0;
|
||||
|
||||
std::cout << GridLogMessage << "Checking RectStaple" << std::endl;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
GaugeMat staple_orig(&GRID), staple_padded(&GRID);
|
||||
double t0 = usecond();
|
||||
WilsonLoopsTest<Gimpl>::RectStapleOrig(staple_orig,U,mu);
|
||||
double t1 = usecond();
|
||||
WilsonLoopsTest<Gimpl>::RectStaplePadded(staple_padded,U,mu);
|
||||
double t2 = usecond();
|
||||
torig += t1-t0; tpadded += t2-t1;
|
||||
++count;
|
||||
|
||||
GaugeMat diff = staple_orig - staple_padded;
|
||||
double n = norm2(diff);
|
||||
std::cout << GridLogMessage << mu << " " << n << std::endl;
|
||||
assert(n<1e-10);
|
||||
}
|
||||
std::cout << GridLogMessage << "RectStaple timings orig: " << torig/1000/count << "ms, padded: " << tpadded/1000/count << "ms" << std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
Loading…
Reference in New Issue
Block a user