mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-22 09:42:02 +01:00
Compare commits
9 Commits
018e6da872
...
feature/ft
Author | SHA1 | Date | |
---|---|---|---|
09146cfc43 | |||
a450e96827 | |||
0f3678b9be | |||
8dd8338e14 | |||
11e0dc9851 | |||
f4ef6dae43 | |||
b6e147372b | |||
3a4a662dc6 | |||
8d06bda6fb |
@ -419,15 +419,14 @@ until convergence
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( Nconv < Nstop ) {
|
if ( Nconv < Nstop )
|
||||||
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
|
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
|
||||||
std::cout << GridLogIRL << "returning Nstop vectors, the last "<< Nstop-Nconv << "of which might meet convergence criterion only approximately" <<std::endl;
|
|
||||||
}
|
|
||||||
eval=eval2;
|
eval=eval2;
|
||||||
|
|
||||||
//Keep only converged
|
//Keep only converged
|
||||||
eval.resize(Nstop);// was Nconv
|
eval.resize(Nconv);// Nstop?
|
||||||
evec.resize(Nstop,grid);// was Nconv
|
evec.resize(Nconv,grid);// Nstop?
|
||||||
basisSortInPlace(evec,eval,reverse);
|
basisSortInPlace(evec,eval,reverse);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -604,8 +604,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
|
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
|
||||||
typedef struct { int fd; pid_t pid ; ze_ipc_mem_handle_t ze; } clone_mem_t;
|
typedef struct { int fd; pid_t pid ; ze_ipc_mem_handle_t ze; } clone_mem_t;
|
||||||
|
|
||||||
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::ext_oneapi_level_zero>(theGridAccelerator->get_device());
|
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
|
||||||
auto zeContext = cl::sycl::get_native<cl::sycl::backend::ext_oneapi_level_zero>(theGridAccelerator->get_context());
|
auto zeContext = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
|
||||||
|
|
||||||
ze_ipc_mem_handle_t ihandle;
|
ze_ipc_mem_handle_t ihandle;
|
||||||
clone_mem_t handle;
|
clone_mem_t handle;
|
||||||
|
@ -47,4 +47,3 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/lattice/Lattice_transfer.h>
|
#include <Grid/lattice/Lattice_transfer.h>
|
||||||
#include <Grid/lattice/Lattice_basis.h>
|
#include <Grid/lattice/Lattice_basis.h>
|
||||||
#include <Grid/lattice/Lattice_crc.h>
|
#include <Grid/lattice/Lattice_crc.h>
|
||||||
#include <Grid/lattice/PaddedCell.h>
|
|
||||||
|
@ -697,68 +697,8 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
|
|||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
assert(Fg->_processors[d] == Tg->_processors[d]);
|
assert(Fg->_processors[d] == Tg->_processors[d]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
|
|
||||||
#if 1
|
|
||||||
|
|
||||||
size_t nsite = 1;
|
|
||||||
for(int i=0;i<nd;i++) nsite *= RegionSize[i];
|
|
||||||
|
|
||||||
size_t tbytes = 4*nsite*sizeof(int);
|
|
||||||
int *table = (int*)malloc(tbytes);
|
|
||||||
|
|
||||||
thread_for(idx, nsite, {
|
|
||||||
Coordinate from_coor, to_coor;
|
|
||||||
size_t rem = idx;
|
|
||||||
for(int i=0;i<nd;i++){
|
|
||||||
size_t base_i = rem % RegionSize[i]; rem /= RegionSize[i];
|
|
||||||
from_coor[i] = base_i + FromLowerLeft[i];
|
|
||||||
to_coor[i] = base_i + ToLowerLeft[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
int foidx = Fg->oIndex(from_coor);
|
|
||||||
int fiidx = Fg->iIndex(from_coor);
|
|
||||||
int toidx = Tg->oIndex(to_coor);
|
|
||||||
int tiidx = Tg->iIndex(to_coor);
|
|
||||||
int* tt = table + 4*idx;
|
|
||||||
tt[0] = foidx;
|
|
||||||
tt[1] = fiidx;
|
|
||||||
tt[2] = toidx;
|
|
||||||
tt[3] = tiidx;
|
|
||||||
});
|
|
||||||
|
|
||||||
int* table_d = (int*)acceleratorAllocDevice(tbytes);
|
|
||||||
acceleratorCopyToDevice(table,table_d,tbytes);
|
|
||||||
|
|
||||||
typedef typename vobj::vector_type vector_type;
|
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
|
||||||
|
|
||||||
autoView(from_v,From,AcceleratorRead);
|
|
||||||
autoView(to_v,To,AcceleratorWrite);
|
|
||||||
|
|
||||||
accelerator_for(idx,nsite,1,{
|
|
||||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
|
||||||
int* tt = table_d + 4*idx;
|
|
||||||
int from_oidx = *tt++;
|
|
||||||
int from_lane = *tt++;
|
|
||||||
int to_oidx = *tt++;
|
|
||||||
int to_lane = *tt;
|
|
||||||
|
|
||||||
const vector_type* from = (const vector_type *)&from_v[from_oidx];
|
|
||||||
vector_type* to = (vector_type *)&to_v[to_oidx];
|
|
||||||
|
|
||||||
scalar_type stmp;
|
|
||||||
for(int w=0;w<words;w++){
|
|
||||||
stmp = getlane(from[w], from_lane);
|
|
||||||
putlane(to[w], stmp, to_lane);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
acceleratorFreeDevice(table_d);
|
|
||||||
free(table);
|
|
||||||
|
|
||||||
|
|
||||||
#else
|
|
||||||
Coordinate ldf = Fg->_ldimensions;
|
Coordinate ldf = Fg->_ldimensions;
|
||||||
Coordinate rdf = Fg->_rdimensions;
|
Coordinate rdf = Fg->_rdimensions;
|
||||||
Coordinate isf = Fg->_istride;
|
Coordinate isf = Fg->_istride;
|
||||||
@ -798,8 +738,6 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -892,8 +830,6 @@ void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slic
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//Insert subvolume orthogonal to direction 'orthog' with slice index 'slice_lo' from 'lowDim' onto slice index 'slice_hi' of higherDim
|
|
||||||
//The local dimensions of both 'lowDim' and 'higherDim' orthogonal to 'orthog' should be the same
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||||
{
|
{
|
||||||
@ -915,65 +851,6 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 1
|
|
||||||
size_t nsite = lg->lSites()/lg->LocalDimensions()[orthog];
|
|
||||||
size_t tbytes = 4*nsite*sizeof(int);
|
|
||||||
int *table = (int*)malloc(tbytes);
|
|
||||||
|
|
||||||
thread_for(idx,nsite,{
|
|
||||||
Coordinate lcoor(nl);
|
|
||||||
Coordinate hcoor(nh);
|
|
||||||
lcoor[orthog] = slice_lo;
|
|
||||||
hcoor[orthog] = slice_hi;
|
|
||||||
size_t rem = idx;
|
|
||||||
for(int mu=0;mu<nl;mu++){
|
|
||||||
if(mu != orthog){
|
|
||||||
int xmu = rem % lg->LocalDimensions()[mu]; rem /= lg->LocalDimensions()[mu];
|
|
||||||
lcoor[mu] = hcoor[mu] = xmu;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int loidx = lg->oIndex(lcoor);
|
|
||||||
int liidx = lg->iIndex(lcoor);
|
|
||||||
int hoidx = hg->oIndex(hcoor);
|
|
||||||
int hiidx = hg->iIndex(hcoor);
|
|
||||||
int* tt = table + 4*idx;
|
|
||||||
tt[0] = loidx;
|
|
||||||
tt[1] = liidx;
|
|
||||||
tt[2] = hoidx;
|
|
||||||
tt[3] = hiidx;
|
|
||||||
});
|
|
||||||
|
|
||||||
int* table_d = (int*)acceleratorAllocDevice(tbytes);
|
|
||||||
acceleratorCopyToDevice(table,table_d,tbytes);
|
|
||||||
|
|
||||||
typedef typename vobj::vector_type vector_type;
|
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
|
||||||
|
|
||||||
autoView(lowDim_v,lowDim,AcceleratorRead);
|
|
||||||
autoView(higherDim_v,higherDim,AcceleratorWrite);
|
|
||||||
|
|
||||||
accelerator_for(idx,nsite,1,{
|
|
||||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
|
||||||
int* tt = table_d + 4*idx;
|
|
||||||
int from_oidx = *tt++;
|
|
||||||
int from_lane = *tt++;
|
|
||||||
int to_oidx = *tt++;
|
|
||||||
int to_lane = *tt;
|
|
||||||
|
|
||||||
const vector_type* from = (const vector_type *)&lowDim_v[from_oidx];
|
|
||||||
vector_type* to = (vector_type *)&higherDim_v[to_oidx];
|
|
||||||
|
|
||||||
scalar_type stmp;
|
|
||||||
for(int w=0;w<words;w++){
|
|
||||||
stmp = getlane(from[w], from_lane);
|
|
||||||
putlane(to[w], stmp, to_lane);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
acceleratorFreeDevice(table_d);
|
|
||||||
free(table);
|
|
||||||
|
|
||||||
#else
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
autoView(lowDimv,lowDim,CpuRead);
|
autoView(lowDimv,lowDim,CpuRead);
|
||||||
autoView(higherDimv,higherDim,CpuWrite);
|
autoView(higherDimv,higherDim,CpuWrite);
|
||||||
@ -989,7 +866,6 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
|||||||
pokeLocalSite(s,higherDimv,hcoor);
|
pokeLocalSite(s,higherDimv,hcoor);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,32 +26,14 @@ Author: Peter Boyle pboyle@bnl.gov
|
|||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include<Grid/cshift/Cshift.h>
|
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
//Allow the user to specify how the C-shift is performed, e.g. to respect the appropriate boundary conditions
|
|
||||||
template<typename vobj>
|
|
||||||
struct CshiftImplBase{
|
|
||||||
virtual Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const = 0;
|
|
||||||
virtual ~CshiftImplBase(){}
|
|
||||||
};
|
|
||||||
template<typename vobj>
|
|
||||||
struct CshiftImplDefault: public CshiftImplBase<vobj>{
|
|
||||||
Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const override{ return Grid::Cshift(in,dir,shift); }
|
|
||||||
};
|
|
||||||
template<typename Gimpl>
|
|
||||||
struct CshiftImplGauge: public CshiftImplBase<typename Gimpl::GaugeLinkField::vector_object>{
|
|
||||||
typename Gimpl::GaugeLinkField Cshift(const typename Gimpl::GaugeLinkField &in, int dir, int shift) const override{ return Gimpl::CshiftLink(in,dir,shift); }
|
|
||||||
};
|
|
||||||
|
|
||||||
class PaddedCell {
|
class PaddedCell {
|
||||||
public:
|
public:
|
||||||
GridCartesian * unpadded_grid;
|
GridCartesian * unpadded_grid;
|
||||||
int dims;
|
int dims;
|
||||||
int depth;
|
int depth;
|
||||||
std::vector<GridCartesian *> grids;
|
std::vector<GridCartesian *> grids;
|
||||||
|
|
||||||
~PaddedCell()
|
~PaddedCell()
|
||||||
{
|
{
|
||||||
DeleteGrids();
|
DeleteGrids();
|
||||||
@ -95,7 +77,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
|
inline Lattice<vobj> Extract(Lattice<vobj> &in)
|
||||||
{
|
{
|
||||||
Lattice<vobj> out(unpadded_grid);
|
Lattice<vobj> out(unpadded_grid);
|
||||||
|
|
||||||
@ -106,19 +88,19 @@ public:
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline Lattice<vobj> Exchange(const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
|
inline Lattice<vobj> Exchange(Lattice<vobj> &in)
|
||||||
{
|
{
|
||||||
GridBase *old_grid = in.Grid();
|
GridBase *old_grid = in.Grid();
|
||||||
int dims = old_grid->Nd();
|
int dims = old_grid->Nd();
|
||||||
Lattice<vobj> tmp = in;
|
Lattice<vobj> tmp = in;
|
||||||
for(int d=0;d<dims;d++){
|
for(int d=0;d<dims;d++){
|
||||||
tmp = Expand(d,tmp,cshift); // rvalue && assignment
|
tmp = Expand(d,tmp); // rvalue && assignment
|
||||||
}
|
}
|
||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
// expand up one dim at a time
|
// expand up one dim at a time
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
|
inline Lattice<vobj> Expand(int dim,Lattice<vobj> &in)
|
||||||
{
|
{
|
||||||
GridBase *old_grid = in.Grid();
|
GridBase *old_grid = in.Grid();
|
||||||
GridCartesian *new_grid = grids[dim];//These are new grids
|
GridCartesian *new_grid = grids[dim];//These are new grids
|
||||||
@ -130,40 +112,20 @@ public:
|
|||||||
else conformable(old_grid,grids[dim-1]);
|
else conformable(old_grid,grids[dim-1]);
|
||||||
|
|
||||||
std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
|
std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
|
||||||
|
|
||||||
double tins=0, tshift=0;
|
|
||||||
|
|
||||||
// Middle bit
|
// Middle bit
|
||||||
double t = usecond();
|
|
||||||
for(int x=0;x<local[dim];x++){
|
for(int x=0;x<local[dim];x++){
|
||||||
InsertSliceLocal(in,padded,x,depth+x,dim);
|
InsertSliceLocal(in,padded,x,depth+x,dim);
|
||||||
}
|
}
|
||||||
tins += usecond() - t;
|
|
||||||
|
|
||||||
// High bit
|
// High bit
|
||||||
t = usecond();
|
shifted = Cshift(in,dim,depth);
|
||||||
shifted = cshift.Cshift(in,dim,depth);
|
|
||||||
tshift += usecond() - t;
|
|
||||||
|
|
||||||
t=usecond();
|
|
||||||
for(int x=0;x<depth;x++){
|
for(int x=0;x<depth;x++){
|
||||||
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
|
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
|
||||||
}
|
}
|
||||||
tins += usecond() - t;
|
|
||||||
|
|
||||||
// Low bit
|
// Low bit
|
||||||
t = usecond();
|
shifted = Cshift(in,dim,-depth);
|
||||||
shifted = cshift.Cshift(in,dim,-depth);
|
|
||||||
tshift += usecond() - t;
|
|
||||||
|
|
||||||
t = usecond();
|
|
||||||
for(int x=0;x<depth;x++){
|
for(int x=0;x<depth;x++){
|
||||||
InsertSliceLocal(shifted,padded,x,x,dim);
|
InsertSliceLocal(shifted,padded,x,x,dim);
|
||||||
}
|
}
|
||||||
tins += usecond() - t;
|
|
||||||
|
|
||||||
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
|
|
||||||
|
|
||||||
return padded;
|
return padded;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -423,6 +423,7 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
|||||||
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
|
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
|
||||||
|
|
||||||
#define KERNEL_CALL_EXT(A) \
|
#define KERNEL_CALL_EXT(A) \
|
||||||
|
const uint64_t NN = Nsite*Ls; \
|
||||||
const uint64_t sz = st.surface_list.size(); \
|
const uint64_t sz = st.surface_list.size(); \
|
||||||
auto ptr = &st.surface_list[0]; \
|
auto ptr = &st.surface_list[0]; \
|
||||||
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
|
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
|
||||||
|
@ -176,7 +176,7 @@ public:
|
|||||||
return PeriodicBC::CshiftLink(Link,mu,shift);
|
return PeriodicBC::CshiftLink(Link,mu,shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void setDirections(const std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
|
static inline void setDirections(std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
|
||||||
static inline std::vector<int> getDirections(void) { return _conjDirs; }
|
static inline std::vector<int> getDirections(void) { return _conjDirs; }
|
||||||
static inline bool isPeriodicGaugeField(void) { return false; }
|
static inline bool isPeriodicGaugeField(void) { return false; }
|
||||||
};
|
};
|
||||||
|
@ -43,7 +43,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
RealD c_plaq;
|
RealD c_plaq;
|
||||||
RealD c_rect;
|
RealD c_rect;
|
||||||
typename WilsonLoops<Gimpl>::StapleAndRectStapleAllWorkspace workspace;
|
|
||||||
public:
|
public:
|
||||||
PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){};
|
PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){};
|
||||||
|
|
||||||
@ -79,18 +79,27 @@ public:
|
|||||||
GridBase *grid = Umu.Grid();
|
GridBase *grid = Umu.Grid();
|
||||||
|
|
||||||
std::vector<GaugeLinkField> U (Nd,grid);
|
std::vector<GaugeLinkField> U (Nd,grid);
|
||||||
|
std::vector<GaugeLinkField> U2(Nd,grid);
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu],U[mu],mu);
|
||||||
}
|
}
|
||||||
std::vector<GaugeLinkField> RectStaple(Nd,grid), Staple(Nd,grid);
|
|
||||||
WilsonLoops<Gimpl>::StapleAndRectStapleAll(Staple, RectStaple, U, workspace);
|
|
||||||
|
|
||||||
GaugeLinkField dSdU_mu(grid);
|
GaugeLinkField dSdU_mu(grid);
|
||||||
GaugeLinkField staple(grid);
|
GaugeLinkField staple(grid);
|
||||||
|
|
||||||
for (int mu=0; mu < Nd; mu++){
|
for (int mu=0; mu < Nd; mu++){
|
||||||
dSdU_mu = Ta(U[mu]*Staple[mu])*factor_p;
|
|
||||||
dSdU_mu = dSdU_mu + Ta(U[mu]*RectStaple[mu])*factor_r;
|
// Staple in direction mu
|
||||||
|
|
||||||
|
WilsonLoops<Gimpl>::Staple(staple,Umu,mu);
|
||||||
|
|
||||||
|
dSdU_mu = Ta(U[mu]*staple)*factor_p;
|
||||||
|
|
||||||
|
WilsonLoops<Gimpl>::RectStaple(Umu,staple,U2,U,mu);
|
||||||
|
|
||||||
|
dSdU_mu = dSdU_mu + Ta(U[mu]*staple)*factor_r;
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
|
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
|
||||||
}
|
}
|
||||||
|
@ -37,14 +37,13 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// Make these members of an Impl class for BC's.
|
// Make these members of an Impl class for BC's.
|
||||||
|
|
||||||
namespace PeriodicBC {
|
namespace PeriodicBC {
|
||||||
//Out(x) = Link(x)*field(x+mu)
|
|
||||||
template<class covariant,class gauge> Lattice<covariant> CovShiftForward(const Lattice<gauge> &Link,
|
template<class covariant,class gauge> Lattice<covariant> CovShiftForward(const Lattice<gauge> &Link,
|
||||||
int mu,
|
int mu,
|
||||||
const Lattice<covariant> &field)
|
const Lattice<covariant> &field)
|
||||||
{
|
{
|
||||||
return Link*Cshift(field,mu,1);// moves towards negative mu
|
return Link*Cshift(field,mu,1);// moves towards negative mu
|
||||||
}
|
}
|
||||||
//Out(x) = Link^dag(x-mu)*field(x-mu)
|
|
||||||
template<class covariant,class gauge> Lattice<covariant> CovShiftBackward(const Lattice<gauge> &Link,
|
template<class covariant,class gauge> Lattice<covariant> CovShiftBackward(const Lattice<gauge> &Link,
|
||||||
int mu,
|
int mu,
|
||||||
const Lattice<covariant> &field)
|
const Lattice<covariant> &field)
|
||||||
@ -53,19 +52,19 @@ namespace PeriodicBC {
|
|||||||
tmp = adj(Link)*field;
|
tmp = adj(Link)*field;
|
||||||
return Cshift(tmp,mu,-1);// moves towards positive mu
|
return Cshift(tmp,mu,-1);// moves towards positive mu
|
||||||
}
|
}
|
||||||
//Out(x) = Link^dag(x-mu)
|
|
||||||
template<class gauge> Lattice<gauge>
|
template<class gauge> Lattice<gauge>
|
||||||
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu)
|
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu)
|
||||||
{
|
{
|
||||||
return Cshift(adj(Link), mu, -1);
|
return Cshift(adj(Link), mu, -1);
|
||||||
}
|
}
|
||||||
//Out(x) = Link(x)
|
|
||||||
template<class gauge> Lattice<gauge>
|
template<class gauge> Lattice<gauge>
|
||||||
CovShiftIdentityForward(const Lattice<gauge> &Link, int mu)
|
CovShiftIdentityForward(const Lattice<gauge> &Link, int mu)
|
||||||
{
|
{
|
||||||
return Link;
|
return Link;
|
||||||
}
|
}
|
||||||
//Link(x) = Link(x+mu)
|
|
||||||
template<class gauge> Lattice<gauge>
|
template<class gauge> Lattice<gauge>
|
||||||
ShiftStaple(const Lattice<gauge> &Link, int mu)
|
ShiftStaple(const Lattice<gauge> &Link, int mu)
|
||||||
{
|
{
|
||||||
|
@ -40,20 +40,18 @@ Lattice<iScalar<iScalar<iScalar<Vec> > > > Determinant(const Lattice<iScalar<iSc
|
|||||||
GridBase *grid=Umu.Grid();
|
GridBase *grid=Umu.Grid();
|
||||||
auto lvol = grid->lSites();
|
auto lvol = grid->lSites();
|
||||||
Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
|
Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
|
||||||
typedef typename Vec::scalar_type scalar;
|
|
||||||
autoView(Umu_v,Umu,CpuRead);
|
autoView(Umu_v,Umu,CpuRead);
|
||||||
autoView(ret_v,ret,CpuWrite);
|
autoView(ret_v,ret,CpuWrite);
|
||||||
thread_for(site,lvol,{
|
thread_for(site,lvol,{
|
||||||
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
|
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
|
||||||
Coordinate lcoor;
|
Coordinate lcoor;
|
||||||
grid->LocalIndexToLocalCoor(site, lcoor);
|
grid->LocalIndexToLocalCoor(site, lcoor);
|
||||||
iScalar<iScalar<iMatrix<scalar, N> > > Us;
|
iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
|
||||||
peekLocalSite(Us, Umu_v, lcoor);
|
peekLocalSite(Us, Umu_v, lcoor);
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
scalar tmp= Us()()(i,j);
|
EigenU(i,j) = Us()()(i,j);
|
||||||
ComplexD ztmp(real(tmp),imag(tmp));
|
|
||||||
EigenU(i,j)=ztmp;
|
|
||||||
}}
|
}}
|
||||||
ComplexD detD = EigenU.determinant();
|
ComplexD detD = EigenU.determinant();
|
||||||
typename Vec::scalar_type det(detD.real(),detD.imag());
|
typename Vec::scalar_type det(detD.real(),detD.imag());
|
||||||
|
@ -290,7 +290,7 @@ public:
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// the sum over all nu-oriented staples for nu != mu on each site
|
// the sum over all staples on each site
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
static void Staple(GaugeMat &staple, const GaugeLorentz &Umu, int mu) {
|
static void Staple(GaugeMat &staple, const GaugeLorentz &Umu, int mu) {
|
||||||
|
|
||||||
@ -300,10 +300,6 @@ public:
|
|||||||
for (int d = 0; d < Nd; d++) {
|
for (int d = 0; d < Nd; d++) {
|
||||||
U[d] = PeekIndex<LorentzIndex>(Umu, d);
|
U[d] = PeekIndex<LorentzIndex>(Umu, d);
|
||||||
}
|
}
|
||||||
Staple(staple, U, mu);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void Staple(GaugeMat &staple, const std::vector<GaugeMat> &U, int mu) {
|
|
||||||
staple = Zero();
|
staple = Zero();
|
||||||
|
|
||||||
for (int nu = 0; nu < Nd; nu++) {
|
for (int nu = 0; nu < Nd; nu++) {
|
||||||
@ -339,202 +335,6 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////
|
|
||||||
//Staples for each direction mu, summed over nu != mu
|
|
||||||
//staple: output staples for each mu (Nd)
|
|
||||||
//U: link array (Nd)
|
|
||||||
/////////////
|
|
||||||
static void StapleAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U) {
|
|
||||||
assert(staple.size() == Nd); assert(U.size() == Nd);
|
|
||||||
for(int mu=0;mu<Nd;mu++) Staple(staple[mu], U, mu);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//A workspace class allowing reuse of the stencil
|
|
||||||
class WilsonLoopPaddedStencilWorkspace{
|
|
||||||
std::unique_ptr<GeneralLocalStencil> stencil;
|
|
||||||
size_t nshift;
|
|
||||||
|
|
||||||
void generateStencil(GridBase* padded_grid){
|
|
||||||
double t0 = usecond();
|
|
||||||
|
|
||||||
//Generate shift arrays
|
|
||||||
std::vector<Coordinate> shifts = this->getShifts();
|
|
||||||
nshift = shifts.size();
|
|
||||||
|
|
||||||
double t1 = usecond();
|
|
||||||
//Generate local stencil
|
|
||||||
stencil.reset(new GeneralLocalStencil(padded_grid,shifts));
|
|
||||||
double t2 = usecond();
|
|
||||||
std::cout << GridLogPerformance << " WilsonLoopPaddedWorkspace timings: coord:" << (t1-t0)/1000 << "ms, stencil:" << (t2-t1)/1000 << "ms" << std::endl;
|
|
||||||
}
|
|
||||||
public:
|
|
||||||
//Get the stencil. If not already generated, or if generated using a different Grid than in PaddedCell, it will be created on-the-fly
|
|
||||||
const GeneralLocalStencil & getStencil(const PaddedCell &pcell){
|
|
||||||
assert(pcell.depth >= this->paddingDepth());
|
|
||||||
if(!stencil || stencil->Grid() != (GridBase*)pcell.grids.back() ) generateStencil((GridBase*)pcell.grids.back());
|
|
||||||
return *stencil;
|
|
||||||
}
|
|
||||||
size_t Nshift() const{ return nshift; }
|
|
||||||
|
|
||||||
virtual std::vector<Coordinate> getShifts() const = 0;
|
|
||||||
virtual int paddingDepth() const = 0; //padding depth required
|
|
||||||
|
|
||||||
virtual ~WilsonLoopPaddedStencilWorkspace(){}
|
|
||||||
};
|
|
||||||
|
|
||||||
//This workspace allows the sharing of a common PaddedCell object between multiple stencil workspaces
|
|
||||||
class WilsonLoopPaddedWorkspace{
|
|
||||||
std::vector<WilsonLoopPaddedStencilWorkspace*> stencil_wk;
|
|
||||||
std::unique_ptr<PaddedCell> pcell;
|
|
||||||
|
|
||||||
void generatePcell(GridBase* unpadded_grid){
|
|
||||||
assert(stencil_wk.size());
|
|
||||||
int max_depth = 0;
|
|
||||||
for(auto const &s : stencil_wk) max_depth=std::max(max_depth, s->paddingDepth());
|
|
||||||
|
|
||||||
pcell.reset(new PaddedCell(max_depth, dynamic_cast<GridCartesian*>(unpadded_grid)));
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
//Add a stencil definition. This should be done before the first call to retrieve a stencil object.
|
|
||||||
//Takes ownership of the pointer
|
|
||||||
void addStencil(WilsonLoopPaddedStencilWorkspace *stencil){
|
|
||||||
assert(!pcell);
|
|
||||||
stencil_wk.push_back(stencil);
|
|
||||||
}
|
|
||||||
|
|
||||||
const GeneralLocalStencil & getStencil(const size_t stencil_idx, GridBase* unpadded_grid){
|
|
||||||
if(!pcell || pcell->unpadded_grid != unpadded_grid) generatePcell(unpadded_grid);
|
|
||||||
return stencil_wk[stencil_idx]->getStencil(*pcell);
|
|
||||||
}
|
|
||||||
const PaddedCell & getPaddedCell(GridBase* unpadded_grid){
|
|
||||||
if(!pcell || pcell->unpadded_grid != unpadded_grid) generatePcell(unpadded_grid);
|
|
||||||
return *pcell;
|
|
||||||
}
|
|
||||||
|
|
||||||
~WilsonLoopPaddedWorkspace(){
|
|
||||||
for(auto &s : stencil_wk) delete s;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//A workspace class allowing reuse of the stencil
|
|
||||||
class StaplePaddedAllWorkspace: public WilsonLoopPaddedStencilWorkspace{
|
|
||||||
public:
|
|
||||||
std::vector<Coordinate> getShifts() const override{
|
|
||||||
std::vector<Coordinate> shifts;
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
for(int nu=0;nu<Nd;nu++){
|
|
||||||
if(nu != mu){
|
|
||||||
Coordinate shift_0(Nd,0);
|
|
||||||
Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
|
|
||||||
Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
|
|
||||||
Coordinate shift_mnu(Nd,0); shift_mnu[nu]=-1;
|
|
||||||
Coordinate shift_mnu_pmu(Nd,0); shift_mnu_pmu[nu]=-1; shift_mnu_pmu[mu]=1;
|
|
||||||
|
|
||||||
//U_nu(x+mu)U^dag_mu(x+nu) U^dag_nu(x)
|
|
||||||
shifts.push_back(shift_0);
|
|
||||||
shifts.push_back(shift_nu);
|
|
||||||
shifts.push_back(shift_mu);
|
|
||||||
|
|
||||||
//U_nu^dag(x-nu+mu) U_mu^dag(x-nu) U_nu(x-nu)
|
|
||||||
shifts.push_back(shift_mnu);
|
|
||||||
shifts.push_back(shift_mnu);
|
|
||||||
shifts.push_back(shift_mnu_pmu);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return shifts;
|
|
||||||
}
|
|
||||||
|
|
||||||
int paddingDepth() const override{ return 1; }
|
|
||||||
};
|
|
||||||
|
|
||||||
//Padded cell implementation of the staple method for all mu, summed over nu != mu
|
|
||||||
//staple: output staple for each mu, summed over nu != mu (Nd)
|
|
||||||
//U_padded: the gauge link fields padded out using the PaddedCell class
|
|
||||||
//Cell: the padded cell class
|
|
||||||
static void StaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell) {
|
|
||||||
StaplePaddedAllWorkspace wk;
|
|
||||||
StaplePaddedAll(staple,U_padded,Cell,wk.getStencil(Cell));
|
|
||||||
}
|
|
||||||
|
|
||||||
//Padded cell implementation of the staple method for all mu, summed over nu != mu
|
|
||||||
//staple: output staple for each mu, summed over nu != mu (Nd)
|
|
||||||
//U_padded: the gauge link fields padded out using the PaddedCell class
|
|
||||||
//Cell: the padded cell class
|
|
||||||
//gStencil: the precomputed generalized local stencil for the staple
|
|
||||||
static void StaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) {
|
|
||||||
double t0 = usecond();
|
|
||||||
assert(U_padded.size() == Nd); assert(staple.size() == Nd);
|
|
||||||
assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back());
|
|
||||||
assert(Cell.depth >= 1);
|
|
||||||
GridBase *ggrid = U_padded[0].Grid(); //padded cell grid
|
|
||||||
|
|
||||||
int shift_mu_off = gStencil._npoints/Nd;
|
|
||||||
|
|
||||||
//Open views to padded gauge links and keep open over mu loop
|
|
||||||
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
|
|
||||||
size_t vsize = Nd*sizeof(GaugeViewType);
|
|
||||||
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
|
|
||||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = U_padded[i].View(AcceleratorRead);
|
|
||||||
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
|
|
||||||
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
|
|
||||||
|
|
||||||
GaugeMat gStaple(ggrid);
|
|
||||||
|
|
||||||
int outer_off = 0;
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
{ //view scope
|
|
||||||
autoView( gStaple_v , gStaple, AcceleratorWrite);
|
|
||||||
auto gStencil_v = gStencil.View();
|
|
||||||
|
|
||||||
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
|
|
||||||
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
|
|
||||||
stencil_ss = Zero();
|
|
||||||
int off = outer_off;
|
|
||||||
|
|
||||||
for(int nu=0;nu<Nd;nu++){
|
|
||||||
if(nu != mu){
|
|
||||||
GeneralStencilEntry const* e = gStencil_v.GetEntry(off++,ss);
|
|
||||||
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(off++,ss);
|
|
||||||
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(off++,ss);
|
|
||||||
auto U2 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U2 * U1 * U0;
|
|
||||||
|
|
||||||
e = gStencil_v.GetEntry(off++,ss);
|
|
||||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(off++,ss);
|
|
||||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(off++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U2 * U1 * U0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
coalescedWrite(gStaple_v[ss],stencil_ss);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
} //ensure views are all closed!
|
|
||||||
|
|
||||||
staple[mu] = Cell.Extract(gStaple);
|
|
||||||
outer_off += shift_mu_off;
|
|
||||||
}//mu loop
|
|
||||||
|
|
||||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
|
|
||||||
free(Ug_dirs_v_host);
|
|
||||||
acceleratorFreeDevice(Ug_dirs_v);
|
|
||||||
|
|
||||||
double t1=usecond();
|
|
||||||
|
|
||||||
std::cout << GridLogPerformance << "StaplePaddedAll timing:" << (t1-t0)/1000 << "ms" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// the sum over all staples on each site in direction mu,nu, upper part
|
// the sum over all staples on each site in direction mu,nu, upper part
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
@ -907,14 +707,18 @@ public:
|
|||||||
// the sum over all staples on each site
|
// the sum over all staples on each site
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
static void RectStapleDouble(GaugeMat &U2, const GaugeMat &U, int mu) {
|
static void RectStapleDouble(GaugeMat &U2, const GaugeMat &U, int mu) {
|
||||||
U2 = U * Gimpl::CshiftLink(U, mu, 1);
|
U2 = U * Cshift(U, mu, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// Hop by two optimisation strategy. Use RectStapleDouble to obtain 'U2'
|
// Hop by two optimisation strategy does not work nicely with Gparity. (could
|
||||||
|
// do,
|
||||||
|
// but need to track two deep where cross boundary and apply a conjugation).
|
||||||
|
// Must differentiate this in Gimpl, and use Gimpl::isPeriodicGaugeField to do
|
||||||
|
// so .
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
static void RectStapleOptimised(GaugeMat &Stap, const std::vector<GaugeMat> &U2,
|
static void RectStapleOptimised(GaugeMat &Stap, std::vector<GaugeMat> &U2,
|
||||||
const std::vector<GaugeMat> &U, int mu) {
|
std::vector<GaugeMat> &U, int mu) {
|
||||||
|
|
||||||
Stap = Zero();
|
Stap = Zero();
|
||||||
|
|
||||||
@ -928,9 +732,9 @@ public:
|
|||||||
|
|
||||||
// Up staple ___ ___
|
// Up staple ___ ___
|
||||||
// | |
|
// | |
|
||||||
tmp = Gimpl::CshiftLink(adj(U[nu]), nu, -1);
|
tmp = Cshift(adj(U[nu]), nu, -1);
|
||||||
tmp = adj(U2[mu]) * tmp;
|
tmp = adj(U2[mu]) * tmp;
|
||||||
tmp = Gimpl::CshiftLink(tmp, mu, -2);
|
tmp = Cshift(tmp, mu, -2);
|
||||||
|
|
||||||
Staple2x1 = Gimpl::CovShiftForward(U[nu], nu, tmp);
|
Staple2x1 = Gimpl::CovShiftForward(U[nu], nu, tmp);
|
||||||
|
|
||||||
@ -938,14 +742,14 @@ public:
|
|||||||
// |___ ___|
|
// |___ ___|
|
||||||
//
|
//
|
||||||
tmp = adj(U2[mu]) * U[nu];
|
tmp = adj(U2[mu]) * U[nu];
|
||||||
Staple2x1 += Gimpl::CovShiftBackward(U[nu], nu, Gimpl::CshiftLink(tmp, mu, -2));
|
Staple2x1 += Gimpl::CovShiftBackward(U[nu], nu, Cshift(tmp, mu, -2));
|
||||||
|
|
||||||
// ___ ___
|
// ___ ___
|
||||||
// | ___|
|
// | ___|
|
||||||
// |___ ___|
|
// |___ ___|
|
||||||
//
|
//
|
||||||
|
|
||||||
Stap += Gimpl::CshiftLink(Gimpl::CovShiftForward(U[mu], mu, Staple2x1), mu, 1);
|
Stap += Cshift(Gimpl::CovShiftForward(U[mu], mu, Staple2x1), mu, 1);
|
||||||
|
|
||||||
// ___ ___
|
// ___ ___
|
||||||
// |___ |
|
// |___ |
|
||||||
@ -954,7 +758,7 @@ public:
|
|||||||
|
|
||||||
// tmp= Staple2x1* Cshift(U[mu],mu,-2);
|
// tmp= Staple2x1* Cshift(U[mu],mu,-2);
|
||||||
// Stap+= Cshift(tmp,mu,1) ;
|
// Stap+= Cshift(tmp,mu,1) ;
|
||||||
Stap += Gimpl::CshiftLink(Staple2x1, mu, 1) * Gimpl::CshiftLink(U[mu], mu, -1);
|
Stap += Cshift(Staple2x1, mu, 1) * Cshift(U[mu], mu, -1);
|
||||||
;
|
;
|
||||||
|
|
||||||
// --
|
// --
|
||||||
@ -962,10 +766,10 @@ public:
|
|||||||
//
|
//
|
||||||
// | |
|
// | |
|
||||||
|
|
||||||
tmp = Gimpl::CshiftLink(adj(U2[nu]), nu, -2);
|
tmp = Cshift(adj(U2[nu]), nu, -2);
|
||||||
tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
|
tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
|
||||||
tmp = U2[nu] * Gimpl::CshiftLink(tmp, nu, 2);
|
tmp = U2[nu] * Cshift(tmp, nu, 2);
|
||||||
Stap += Gimpl::CshiftLink(tmp, mu, 1);
|
Stap += Cshift(tmp, mu, 1);
|
||||||
|
|
||||||
// | |
|
// | |
|
||||||
//
|
//
|
||||||
@ -974,12 +778,25 @@ public:
|
|||||||
|
|
||||||
tmp = Gimpl::CovShiftBackward(U[mu], mu, U2[nu]);
|
tmp = Gimpl::CovShiftBackward(U[mu], mu, U2[nu]);
|
||||||
tmp = adj(U2[nu]) * tmp;
|
tmp = adj(U2[nu]) * tmp;
|
||||||
tmp = Gimpl::CshiftLink(tmp, nu, -2);
|
tmp = Cshift(tmp, nu, -2);
|
||||||
Stap += Gimpl::CshiftLink(tmp, mu, 1);
|
Stap += Cshift(tmp, mu, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void RectStaple(GaugeMat &Stap, const GaugeLorentz &Umu, int mu) {
|
||||||
|
RectStapleUnoptimised(Stap, Umu, mu);
|
||||||
|
}
|
||||||
|
static void RectStaple(const GaugeLorentz &Umu, GaugeMat &Stap,
|
||||||
|
std::vector<GaugeMat> &U2, std::vector<GaugeMat> &U,
|
||||||
|
int mu) {
|
||||||
|
if (Gimpl::isPeriodicGaugeField()) {
|
||||||
|
RectStapleOptimised(Stap, U2, U, mu);
|
||||||
|
} else {
|
||||||
|
RectStapleUnoptimised(Stap, Umu, mu);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void RectStapleUnoptimised(GaugeMat &Stap, const GaugeLorentz &Umu,
|
static void RectStapleUnoptimised(GaugeMat &Stap, const GaugeLorentz &Umu,
|
||||||
int mu) {
|
int mu) {
|
||||||
GridBase *grid = Umu.Grid();
|
GridBase *grid = Umu.Grid();
|
||||||
@ -1078,288 +895,6 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void RectStaple(GaugeMat &Stap, const GaugeLorentz &Umu, int mu) {
|
|
||||||
RectStapleUnoptimised(Stap, Umu, mu);
|
|
||||||
}
|
|
||||||
static void RectStaple(const GaugeLorentz &Umu, GaugeMat &Stap,
|
|
||||||
std::vector<GaugeMat> &U2, std::vector<GaugeMat> &U,
|
|
||||||
int mu) {
|
|
||||||
RectStapleOptimised(Stap, U2, U, mu);
|
|
||||||
}
|
|
||||||
//////////////////////////////////////////////////////
|
|
||||||
//Compute the rectangular staples for all orientations
|
|
||||||
//Stap : Array of staples (Nd)
|
|
||||||
//U: Gauge links in each direction (Nd)
|
|
||||||
/////////////////////////////////////////////////////
|
|
||||||
static void RectStapleAll(std::vector<GaugeMat> &Stap, const std::vector<GaugeMat> &U){
|
|
||||||
assert(Stap.size() == Nd); assert(U.size() == Nd);
|
|
||||||
std::vector<GaugeMat> U2(Nd,U[0].Grid());
|
|
||||||
for(int mu=0;mu<Nd;mu++) RectStapleDouble(U2[mu], U[mu], mu);
|
|
||||||
for(int mu=0;mu<Nd;mu++) RectStapleOptimised(Stap[mu], U2, U, mu);
|
|
||||||
}
|
|
||||||
|
|
||||||
//A workspace class allowing reuse of the stencil
|
|
||||||
class RectStaplePaddedAllWorkspace: public WilsonLoopPaddedStencilWorkspace{
|
|
||||||
public:
|
|
||||||
std::vector<Coordinate> getShifts() const override{
|
|
||||||
std::vector<Coordinate> shifts;
|
|
||||||
for (int mu = 0; mu < Nd; mu++){
|
|
||||||
for (int nu = 0; nu < Nd; nu++) {
|
|
||||||
if (nu != mu) {
|
|
||||||
auto genShift = [&](int mushift,int nushift){
|
|
||||||
Coordinate out(Nd,0); out[mu]=mushift; out[nu]=nushift; return out;
|
|
||||||
};
|
|
||||||
|
|
||||||
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
|
|
||||||
shifts.push_back(genShift(0,0));
|
|
||||||
shifts.push_back(genShift(0,+1));
|
|
||||||
shifts.push_back(genShift(+1,+1));
|
|
||||||
shifts.push_back(genShift(+2,0));
|
|
||||||
shifts.push_back(genShift(+1,0));
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
|
|
||||||
shifts.push_back(genShift(0,-1));
|
|
||||||
shifts.push_back(genShift(0,-1));
|
|
||||||
shifts.push_back(genShift(+1,-1));
|
|
||||||
shifts.push_back(genShift(+2,-1));
|
|
||||||
shifts.push_back(genShift(+1,0));
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
|
|
||||||
shifts.push_back(genShift(-1,0));
|
|
||||||
shifts.push_back(genShift(-1,-1));
|
|
||||||
shifts.push_back(genShift(-1,-1));
|
|
||||||
shifts.push_back(genShift(0,-1));
|
|
||||||
shifts.push_back(genShift(+1,-1));
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
|
|
||||||
shifts.push_back(genShift(-1,0));
|
|
||||||
shifts.push_back(genShift(-1,0));
|
|
||||||
shifts.push_back(genShift(-1,+1));
|
|
||||||
shifts.push_back(genShift(0,+1));
|
|
||||||
shifts.push_back(genShift(+1,0));
|
|
||||||
|
|
||||||
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
|
|
||||||
shifts.push_back(genShift(0,0));
|
|
||||||
shifts.push_back(genShift(0,+1));
|
|
||||||
shifts.push_back(genShift(0,+2));
|
|
||||||
shifts.push_back(genShift(+1,+1));
|
|
||||||
shifts.push_back(genShift(+1,0));
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
|
|
||||||
shifts.push_back(genShift(0,-1));
|
|
||||||
shifts.push_back(genShift(0,-2));
|
|
||||||
shifts.push_back(genShift(0,-2));
|
|
||||||
shifts.push_back(genShift(+1,-2));
|
|
||||||
shifts.push_back(genShift(+1,-1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return shifts;
|
|
||||||
}
|
|
||||||
|
|
||||||
int paddingDepth() const override{ return 2; }
|
|
||||||
};
|
|
||||||
|
|
||||||
//Padded cell implementation of the rectangular staple method for all mu, summed over nu != mu
|
|
||||||
//staple: output staple for each mu, summed over nu != mu (Nd)
|
|
||||||
//U_padded: the gauge link fields padded out using the PaddedCell class
|
|
||||||
//Cell: the padded cell class
|
|
||||||
static void RectStaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell) {
|
|
||||||
RectStaplePaddedAllWorkspace wk;
|
|
||||||
RectStaplePaddedAll(staple,U_padded,Cell,wk.getStencil(Cell));
|
|
||||||
}
|
|
||||||
|
|
||||||
//Padded cell implementation of the rectangular staple method for all mu, summed over nu != mu
|
|
||||||
//staple: output staple for each mu, summed over nu != mu (Nd)
|
|
||||||
//U_padded: the gauge link fields padded out using the PaddedCell class
|
|
||||||
//Cell: the padded cell class
|
|
||||||
//gStencil: the stencil
|
|
||||||
static void RectStaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) {
|
|
||||||
double t0 = usecond();
|
|
||||||
assert(U_padded.size() == Nd); assert(staple.size() == Nd);
|
|
||||||
assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back());
|
|
||||||
assert(Cell.depth >= 2);
|
|
||||||
GridBase *ggrid = U_padded[0].Grid(); //padded cell grid
|
|
||||||
|
|
||||||
size_t nshift = gStencil._npoints;
|
|
||||||
int mu_off_delta = nshift / Nd;
|
|
||||||
|
|
||||||
//Open views to padded gauge links and keep open over mu loop
|
|
||||||
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
|
|
||||||
size_t vsize = Nd*sizeof(GaugeViewType);
|
|
||||||
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
|
|
||||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = U_padded[i].View(AcceleratorRead);
|
|
||||||
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
|
|
||||||
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
|
|
||||||
|
|
||||||
GaugeMat gStaple(ggrid); //temp staple object on padded grid
|
|
||||||
|
|
||||||
int offset = 0;
|
|
||||||
for(int mu=0; mu<Nd; mu++){
|
|
||||||
|
|
||||||
{ //view scope
|
|
||||||
autoView( gStaple_v , gStaple, AcceleratorWrite);
|
|
||||||
auto gStencil_v = gStencil.View();
|
|
||||||
|
|
||||||
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
|
|
||||||
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
|
|
||||||
stencil_ss = Zero();
|
|
||||||
int s=offset;
|
|
||||||
for(int nu=0;nu<Nd;nu++){
|
|
||||||
if(nu != mu){
|
|
||||||
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
|
|
||||||
GeneralStencilEntry const* e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
auto U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
auto U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
auto U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
coalescedWrite(gStaple_v[ss],stencil_ss);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
offset += mu_off_delta;
|
|
||||||
}//kernel/view scope
|
|
||||||
|
|
||||||
staple[mu] = Cell.Extract(gStaple);
|
|
||||||
}//mu loop
|
|
||||||
|
|
||||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
|
|
||||||
free(Ug_dirs_v_host);
|
|
||||||
acceleratorFreeDevice(Ug_dirs_v);
|
|
||||||
|
|
||||||
double t1 = usecond();
|
|
||||||
|
|
||||||
std::cout << GridLogPerformance << "RectStaplePaddedAll timings:" << (t1-t0)/1000 << "ms" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
//A workspace for reusing the PaddedCell and GeneralLocalStencil objects
|
|
||||||
class StapleAndRectStapleAllWorkspace: public WilsonLoopPaddedWorkspace{
|
|
||||||
public:
|
|
||||||
StapleAndRectStapleAllWorkspace(){
|
|
||||||
this->addStencil(new StaplePaddedAllWorkspace);
|
|
||||||
this->addStencil(new RectStaplePaddedAllWorkspace);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
|
||||||
//Compute the 1x1 and 1x2 staples for all orientations
|
|
||||||
//Stap : Array of staples (Nd)
|
|
||||||
//RectStap: Array of rectangular staples (Nd)
|
|
||||||
//U: Gauge links in each direction (Nd)
|
|
||||||
/////////////////////////////////////////////////////
|
|
||||||
static void StapleAndRectStapleAll(std::vector<GaugeMat> &Stap, std::vector<GaugeMat> &RectStap, const std::vector<GaugeMat> &U){
|
|
||||||
StapleAndRectStapleAllWorkspace wk;
|
|
||||||
StapleAndRectStapleAll(Stap,RectStap,U,wk);
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
|
||||||
//Compute the 1x1 and 1x2 staples for all orientations
|
|
||||||
//Stap : Array of staples (Nd)
|
|
||||||
//RectStap: Array of rectangular staples (Nd)
|
|
||||||
//U: Gauge links in each direction (Nd)
|
|
||||||
//wk: a workspace containing stored PaddedCell and GeneralLocalStencil objects to maximize reuse
|
|
||||||
/////////////////////////////////////////////////////
|
|
||||||
static void StapleAndRectStapleAll(std::vector<GaugeMat> &Stap, std::vector<GaugeMat> &RectStap, const std::vector<GaugeMat> &U, StapleAndRectStapleAllWorkspace &wk){
|
|
||||||
#if 0
|
|
||||||
StapleAll(Stap, U);
|
|
||||||
RectStapleAll(RectStap, U);
|
|
||||||
#else
|
|
||||||
double t0 = usecond();
|
|
||||||
|
|
||||||
GridCartesian* unpadded_grid = dynamic_cast<GridCartesian*>(U[0].Grid());
|
|
||||||
const PaddedCell &Ghost = wk.getPaddedCell(unpadded_grid);
|
|
||||||
|
|
||||||
CshiftImplGauge<Gimpl> cshift_impl;
|
|
||||||
std::vector<GaugeMat> U_pad(Nd, Ghost.grids.back());
|
|
||||||
for(int mu=0;mu<Nd;mu++) U_pad[mu] = Ghost.Exchange(U[mu], cshift_impl);
|
|
||||||
double t1 = usecond();
|
|
||||||
StaplePaddedAll(Stap, U_pad, Ghost, wk.getStencil(0,unpadded_grid) );
|
|
||||||
double t2 = usecond();
|
|
||||||
RectStaplePaddedAll(RectStap, U_pad, Ghost, wk.getStencil(1,unpadded_grid));
|
|
||||||
double t3 = usecond();
|
|
||||||
std::cout << GridLogPerformance << "StapleAndRectStapleAll timings: pad:" << (t1-t0)/1000 << "ms, staple:" << (t2-t1)/1000 << "ms, rect-staple:" << (t3-t2)/1000 << "ms" << std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// Wilson loop of size (R1, R2), oriented in mu,nu plane
|
// Wilson loop of size (R1, R2), oriented in mu,nu plane
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
|
@ -79,10 +79,10 @@ public:
|
|||||||
this->_entries.resize(npoints* osites);
|
this->_entries.resize(npoints* osites);
|
||||||
this->_entries_p = &_entries[0];
|
this->_entries_p = &_entries[0];
|
||||||
|
|
||||||
thread_for(site, osites, {
|
|
||||||
Coordinate Coor;
|
Coordinate Coor;
|
||||||
Coordinate NbrCoor;
|
Coordinate NbrCoor;
|
||||||
|
for(Integer site=0;site<osites;site++){
|
||||||
for(Integer ii=0;ii<npoints;ii++){
|
for(Integer ii=0;ii<npoints;ii++){
|
||||||
Integer lex = site*npoints+ii;
|
Integer lex = site*npoints+ii;
|
||||||
GeneralStencilEntry SE;
|
GeneralStencilEntry SE;
|
||||||
@ -132,7 +132,7 @@ public:
|
|||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
this->_entries[lex] = SE;
|
this->_entries[lex] = SE;
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -32,7 +32,6 @@
|
|||||||
|
|
||||||
#include <Grid/stencil/SimpleCompressor.h> // subdir aggregate
|
#include <Grid/stencil/SimpleCompressor.h> // subdir aggregate
|
||||||
#include <Grid/stencil/Lebesgue.h> // subdir aggregate
|
#include <Grid/stencil/Lebesgue.h> // subdir aggregate
|
||||||
#include <Grid/stencil/GeneralLocalStencil.h>
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Must not lose sight that goal is to be able to construct really efficient
|
// Must not lose sight that goal is to be able to construct really efficient
|
||||||
@ -706,7 +705,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << GridLogDebug << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
||||||
}
|
}
|
||||||
/// Introduce a block structure and switch off comms on boundaries
|
/// Introduce a block structure and switch off comms on boundaries
|
||||||
void DirichletBlock(const Coordinate &dirichlet_block)
|
void DirichletBlock(const Coordinate &dirichlet_block)
|
||||||
|
@ -73,16 +73,6 @@ vobj coalescedReadPermute(const vobj & __restrict__ vec,int ptype,int doperm,int
|
|||||||
return vec;
|
return vec;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//'perm_mask' acts as a bitmask
|
|
||||||
template<class vobj> accelerator_inline
|
|
||||||
vobj coalescedReadGeneralPermute(const vobj & __restrict__ vec,int perm_mask,int nd,int lane=0)
|
|
||||||
{
|
|
||||||
auto obj = vec, tmp = vec;
|
|
||||||
for (int d=0;d<nd;d++)
|
|
||||||
if (perm_mask & (0x1 << d)) { permute(obj,tmp,d); tmp=obj;}
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class vobj> accelerator_inline
|
template<class vobj> accelerator_inline
|
||||||
void coalescedWrite(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0)
|
void coalescedWrite(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0)
|
||||||
{
|
{
|
||||||
@ -93,7 +83,7 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__
|
|||||||
{
|
{
|
||||||
vstream(vec, extracted);
|
vstream(vec, extracted);
|
||||||
}
|
}
|
||||||
#else //==GRID_SIMT
|
#else
|
||||||
|
|
||||||
|
|
||||||
//#ifndef GRID_SYCL
|
//#ifndef GRID_SYCL
|
||||||
@ -176,14 +166,6 @@ typename vobj::scalar_object coalescedReadPermute(const vobj & __restrict__ vec,
|
|||||||
return extractLane(plane,vec);
|
return extractLane(plane,vec);
|
||||||
}
|
}
|
||||||
template<class vobj> accelerator_inline
|
template<class vobj> accelerator_inline
|
||||||
typename vobj::scalar_object coalescedReadGeneralPermute(const vobj & __restrict__ vec,int perm_mask,int nd,int lane=acceleratorSIMTlane(vobj::Nsimd()))
|
|
||||||
{
|
|
||||||
int plane = lane;
|
|
||||||
for (int d=0;d<nd;d++)
|
|
||||||
plane = (perm_mask & (0x1 << d)) ? plane ^ (vobj::Nsimd() >> (d + 1)) : plane;
|
|
||||||
return extractLane(plane,vec);
|
|
||||||
}
|
|
||||||
template<class vobj> accelerator_inline
|
|
||||||
void coalescedWrite(vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted,int lane=acceleratorSIMTlane(vobj::Nsimd()))
|
void coalescedWrite(vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted,int lane=acceleratorSIMTlane(vobj::Nsimd()))
|
||||||
{
|
{
|
||||||
insertLane(lane,vec,extracted);
|
insertLane(lane,vec,extracted);
|
||||||
|
@ -90,12 +90,10 @@ template<class vtype,int N> accelerator_inline iVector<vtype,N> ProjectOnGroup(c
|
|||||||
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
|
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
|
||||||
accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
||||||
{
|
{
|
||||||
typedef typename iMatrix<vtype,N>::scalar_type scalar;
|
|
||||||
// need a check for the group type?
|
// need a check for the group type?
|
||||||
iMatrix<vtype,N> ret(arg);
|
iMatrix<vtype,N> ret(arg);
|
||||||
vtype nrm;
|
vtype nrm;
|
||||||
vtype inner;
|
vtype inner;
|
||||||
scalar one(1.0);
|
|
||||||
for(int c1=0;c1<N;c1++){
|
for(int c1=0;c1<N;c1++){
|
||||||
|
|
||||||
// Normalises row c1
|
// Normalises row c1
|
||||||
@ -104,7 +102,7 @@ accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
|||||||
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
||||||
|
|
||||||
nrm = sqrt(inner);
|
nrm = sqrt(inner);
|
||||||
nrm = one/nrm;
|
nrm = 1.0/nrm;
|
||||||
for(int c2=0;c2<N;c2++)
|
for(int c2=0;c2<N;c2++)
|
||||||
ret._internal[c1][c2]*= nrm;
|
ret._internal[c1][c2]*= nrm;
|
||||||
|
|
||||||
@ -129,7 +127,7 @@ accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
|||||||
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
||||||
|
|
||||||
nrm = sqrt(inner);
|
nrm = sqrt(inner);
|
||||||
nrm = one/nrm;
|
nrm = 1.0/nrm;
|
||||||
for(int c2=0;c2<N;c2++)
|
for(int c2=0;c2<N;c2++)
|
||||||
ret._internal[c1][c2]*= nrm;
|
ret._internal[c1][c2]*= nrm;
|
||||||
}
|
}
|
||||||
|
@ -55,7 +55,7 @@ template<class vtype, int N> accelerator_inline iVector<vtype, N> Exponentiate(c
|
|||||||
|
|
||||||
|
|
||||||
// Specialisation: Cayley-Hamilton exponential for SU(3)
|
// Specialisation: Cayley-Hamilton exponential for SU(3)
|
||||||
#if 0
|
#ifndef GRID_ACCELERATED
|
||||||
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
|
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
|
||||||
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
|
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
|
||||||
{
|
{
|
||||||
|
224
HMC/FTHMC2p1f.cc
224
HMC/FTHMC2p1f.cc
@ -1,224 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Copyright (C) 2023
|
|
||||||
|
|
||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution
|
|
||||||
directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#include <Grid/Grid.h>
|
|
||||||
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
|
|
||||||
#include <Grid/qcd/smearing/JacobianAction.h>
|
|
||||||
|
|
||||||
using namespace Grid;
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision(12);
|
|
||||||
|
|
||||||
Grid_init(&argc, &argv);
|
|
||||||
int threads = GridThread::GetThreads();
|
|
||||||
// here make a routine to print all the relevant information on the run
|
|
||||||
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
|
|
||||||
|
|
||||||
// Typedefs to simplify notation
|
|
||||||
typedef WilsonImplR FermionImplPolicy;
|
|
||||||
typedef MobiusFermionD FermionAction;
|
|
||||||
typedef typename FermionAction::FermionField FermionField;
|
|
||||||
|
|
||||||
typedef Grid::XmlReader Serialiser;
|
|
||||||
|
|
||||||
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
IntegratorParameters MD;
|
|
||||||
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
|
|
||||||
// MD.name = std::string("Leap Frog");
|
|
||||||
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
|
|
||||||
// MD.name = std::string("Force Gradient");
|
|
||||||
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
|
||||||
MD.name = std::string("MinimumNorm2");
|
|
||||||
MD.MDsteps = 12;
|
|
||||||
MD.trajL = 1.0;
|
|
||||||
|
|
||||||
HMCparameters HMCparams;
|
|
||||||
HMCparams.StartTrajectory = 0;
|
|
||||||
HMCparams.Trajectories = 200;
|
|
||||||
HMCparams.NoMetropolisUntil= 20;
|
|
||||||
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
|
||||||
HMCparams.StartingType =std::string("HotStart");
|
|
||||||
HMCparams.MD = MD;
|
|
||||||
HMCWrapper TheHMC(HMCparams);
|
|
||||||
|
|
||||||
// Grid from the command line arguments --grid and --mpi
|
|
||||||
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
|
|
||||||
|
|
||||||
CheckpointerParameters CPparams;
|
|
||||||
CPparams.config_prefix = "ckpoint_EODWF_lat";
|
|
||||||
CPparams.smeared_prefix = "ckpoint_EODWF_lat_smr";
|
|
||||||
CPparams.rng_prefix = "ckpoint_EODWF_rng";
|
|
||||||
CPparams.saveInterval = 1;
|
|
||||||
CPparams.saveSmeared = true;
|
|
||||||
CPparams.format = "IEEE64BIG";
|
|
||||||
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
|
||||||
|
|
||||||
RNGModuleParameters RNGpar;
|
|
||||||
RNGpar.serial_seeds = "1 2 3 4 5";
|
|
||||||
RNGpar.parallel_seeds = "6 7 8 9 10";
|
|
||||||
TheHMC.Resources.SetRNGSeeds(RNGpar);
|
|
||||||
|
|
||||||
// Construct observables
|
|
||||||
// here there is too much indirection
|
|
||||||
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
|
|
||||||
TheHMC.Resources.AddObservable<PlaqObs>();
|
|
||||||
//////////////////////////////////////////////
|
|
||||||
|
|
||||||
const int Ls = 16;
|
|
||||||
Real beta = 2.13;
|
|
||||||
Real light_mass = 0.01;
|
|
||||||
Real strange_mass = 0.04;
|
|
||||||
Real pv_mass = 1.0;
|
|
||||||
RealD M5 = 1.8;
|
|
||||||
RealD b = 1.0; // Scale factor two
|
|
||||||
RealD c = 0.0;
|
|
||||||
|
|
||||||
OneFlavourRationalParams OFRp;
|
|
||||||
OFRp.lo = 1.0e-2;
|
|
||||||
OFRp.hi = 64;
|
|
||||||
OFRp.MaxIter = 10000;
|
|
||||||
OFRp.tolerance= 1.0e-10;
|
|
||||||
OFRp.degree = 14;
|
|
||||||
OFRp.precision= 40;
|
|
||||||
|
|
||||||
std::vector<Real> hasenbusch({ 0.1 });
|
|
||||||
|
|
||||||
auto GridPtr = TheHMC.Resources.GetCartesian();
|
|
||||||
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
|
|
||||||
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
|
|
||||||
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
|
|
||||||
|
|
||||||
IwasakiGaugeActionR GaugeAction(beta);
|
|
||||||
|
|
||||||
// temporarily need a gauge field
|
|
||||||
LatticeGaugeField U(GridPtr);
|
|
||||||
LatticeGaugeField Uhot(GridPtr);
|
|
||||||
|
|
||||||
// These lines are unecessary if BC are all periodic
|
|
||||||
std::vector<Complex> boundary = {1,1,1,-1};
|
|
||||||
FermionAction::ImplParams Params(boundary);
|
|
||||||
|
|
||||||
double StoppingCondition = 1e-10;
|
|
||||||
double MaxCGIterations = 30000;
|
|
||||||
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
|
||||||
|
|
||||||
bool ApplySmearing = true;
|
|
||||||
|
|
||||||
////////////////////////////////////
|
|
||||||
// Collect actions
|
|
||||||
////////////////////////////////////
|
|
||||||
ActionLevel<HMCWrapper::Field> Level1(1);
|
|
||||||
ActionLevel<HMCWrapper::Field> Level2(2);
|
|
||||||
ActionLevel<HMCWrapper::Field> Level3(4);
|
|
||||||
|
|
||||||
////////////////////////////////////
|
|
||||||
// Strange action
|
|
||||||
////////////////////////////////////
|
|
||||||
|
|
||||||
MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
|
|
||||||
MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c);
|
|
||||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>
|
|
||||||
EOFA(Strange_Op_L, Strange_Op_R,
|
|
||||||
CG,
|
|
||||||
CG, CG,
|
|
||||||
CG, CG,
|
|
||||||
OFRp, false);
|
|
||||||
|
|
||||||
EOFA.is_smeared = ApplySmearing;
|
|
||||||
Level1.push_back(&EOFA);
|
|
||||||
|
|
||||||
////////////////////////////////////
|
|
||||||
// up down action
|
|
||||||
////////////////////////////////////
|
|
||||||
std::vector<Real> light_den;
|
|
||||||
std::vector<Real> light_num;
|
|
||||||
|
|
||||||
int n_hasenbusch = hasenbusch.size();
|
|
||||||
light_den.push_back(light_mass);
|
|
||||||
for(int h=0;h<n_hasenbusch;h++){
|
|
||||||
light_den.push_back(hasenbusch[h]);
|
|
||||||
light_num.push_back(hasenbusch[h]);
|
|
||||||
}
|
|
||||||
light_num.push_back(pv_mass);
|
|
||||||
|
|
||||||
std::vector<FermionAction *> Numerators;
|
|
||||||
std::vector<FermionAction *> Denominators;
|
|
||||||
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
|
|
||||||
|
|
||||||
for(int h=0;h<n_hasenbusch+1;h++){
|
|
||||||
std::cout << GridLogMessage << " 2f quotient Action "<< light_num[h] << " / " << light_den[h]<< std::endl;
|
|
||||||
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
|
|
||||||
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
|
|
||||||
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int h=0;h<n_hasenbusch+1;h++){
|
|
||||||
Quotients[h]->is_smeared = ApplySmearing;
|
|
||||||
Level1.push_back(Quotients[h]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
|
||||||
// lnDetJacobianAction
|
|
||||||
/////////////////////////////////////////////////////////////
|
|
||||||
double rho = 0.1; // smearing parameter
|
|
||||||
int Nsmear = 1; // number of smearing levels - must be multiple of 2Nd
|
|
||||||
int Nstep = 8*Nsmear; // number of smearing levels - must be multiple of 2Nd
|
|
||||||
Smear_Stout<HMCWrapper::ImplPolicy> Stout(rho);
|
|
||||||
SmearedConfigurationMasked<HMCWrapper::ImplPolicy> SmearingPolicy(GridPtr, Nstep, Stout);
|
|
||||||
JacobianAction<HMCWrapper::ImplPolicy> Jacobian(&SmearingPolicy);
|
|
||||||
if( ApplySmearing ) Level2.push_back(&Jacobian);
|
|
||||||
std::cout << GridLogMessage << " Built the Jacobian "<< std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
|
||||||
// Gauge action
|
|
||||||
/////////////////////////////////////////////////////////////
|
|
||||||
// GaugeAction.is_smeared = ApplySmearing;
|
|
||||||
GaugeAction.is_smeared = true;
|
|
||||||
Level3.push_back(&GaugeAction);
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << " ************************************************"<< std::endl;
|
|
||||||
std::cout << GridLogMessage << " Action complete -- NO FERMIONS FOR NOW -- FIXME"<< std::endl;
|
|
||||||
std::cout << GridLogMessage << " ************************************************"<< std::endl;
|
|
||||||
std::cout << GridLogMessage << std::endl;
|
|
||||||
std::cout << GridLogMessage << std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << " Running the FT HMC "<< std::endl;
|
|
||||||
|
|
||||||
TheHMC.TheAction.push_back(Level1);
|
|
||||||
TheHMC.TheAction.push_back(Level2);
|
|
||||||
TheHMC.TheAction.push_back(Level3);
|
|
||||||
|
|
||||||
TheHMC.Run(SmearingPolicy); // for smearing
|
|
||||||
|
|
||||||
Grid_finalize();
|
|
||||||
} // main
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -146,8 +146,6 @@ NAMESPACE_END(Grid);
|
|||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
std::cout << " Grid Initialise "<<std::endl;
|
|
||||||
|
|
||||||
Grid_init(&argc, &argv);
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
CartesianCommunicator::BarrierWorld();
|
CartesianCommunicator::BarrierWorld();
|
||||||
@ -172,24 +170,24 @@ int main(int argc, char **argv) {
|
|||||||
IntegratorParameters MD;
|
IntegratorParameters MD;
|
||||||
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
|
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
|
||||||
// MD.name = std::string("Leap Frog");
|
// MD.name = std::string("Leap Frog");
|
||||||
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
|
typedef GenericHMCRunner<ForceGradient> HMCWrapper;
|
||||||
// MD.name = std::string("Force Gradient");
|
MD.name = std::string("Force Gradient");
|
||||||
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
//typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
||||||
MD.name = std::string("MinimumNorm2");
|
// MD.name = std::string("MinimumNorm2");
|
||||||
// TrajL = 2
|
// TrajL = 2
|
||||||
// 4/2 => 0.6 dH
|
// 4/2 => 0.6 dH
|
||||||
// 3/3 => 0.8 dH .. depth 3, slower
|
// 3/3 => 0.8 dH .. depth 3, slower
|
||||||
//MD.MDsteps = 4;
|
//MD.MDsteps = 4;
|
||||||
MD.MDsteps = 14;
|
MD.MDsteps = 12;
|
||||||
MD.trajL = 0.5;
|
MD.trajL = 0.5;
|
||||||
|
|
||||||
HMCparameters HMCparams;
|
HMCparameters HMCparams;
|
||||||
HMCparams.StartTrajectory = 1077;
|
HMCparams.StartTrajectory = 1077;
|
||||||
HMCparams.Trajectories = 20;
|
HMCparams.Trajectories = 1;
|
||||||
HMCparams.NoMetropolisUntil= 0;
|
HMCparams.NoMetropolisUntil= 0;
|
||||||
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
||||||
HMCparams.StartingType =std::string("ColdStart");
|
// HMCparams.StartingType =std::string("ColdStart");
|
||||||
// HMCparams.StartingType =std::string("CheckpointStart");
|
HMCparams.StartingType =std::string("CheckpointStart");
|
||||||
HMCparams.MD = MD;
|
HMCparams.MD = MD;
|
||||||
HMCWrapper TheHMC(HMCparams);
|
HMCWrapper TheHMC(HMCparams);
|
||||||
|
|
||||||
@ -225,7 +223,7 @@ int main(int argc, char **argv) {
|
|||||||
Real pv_mass = 1.0;
|
Real pv_mass = 1.0;
|
||||||
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 }); // Updated
|
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
||||||
|
|
||||||
auto GridPtr = TheHMC.Resources.GetCartesian();
|
auto GridPtr = TheHMC.Resources.GetCartesian();
|
||||||
@ -277,10 +275,10 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
// double StoppingCondition = 1e-14;
|
// double StoppingCondition = 1e-14;
|
||||||
// double MDStoppingCondition = 1e-9;
|
// double MDStoppingCondition = 1e-9;
|
||||||
double StoppingCondition = 1e-9;
|
double StoppingCondition = 1e-8;
|
||||||
double MDStoppingCondition = 1e-8;
|
double MDStoppingCondition = 1e-7;
|
||||||
double MDStoppingConditionLoose = 1e-8;
|
double MDStoppingConditionLoose = 1e-7;
|
||||||
double MDStoppingConditionStrange = 1e-8;
|
double MDStoppingConditionStrange = 1e-7;
|
||||||
double MaxCGIterations = 300000;
|
double MaxCGIterations = 300000;
|
||||||
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
||||||
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
|
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
|
||||||
|
@ -10,8 +10,9 @@ For first time setup of the Xcode and Grid build environment on Mac OS, you will
|
|||||||
|
|
||||||
1. Install Xcode and the Xcode command-line utilities
|
1. Install Xcode and the Xcode command-line utilities
|
||||||
2. Set Grid environment variables
|
2. Set Grid environment variables
|
||||||
3. Install and build Grid pre-requisites
|
3. Install and build Open MPI ***optional***
|
||||||
4. Install, Configure and Build Grid
|
4. Install and build Grid pre-requisites
|
||||||
|
5. Install, Configure and Build Grid
|
||||||
|
|
||||||
Apple's [Xcode website][Xcode] is the go-to reference for 1, and the definitive reference for 4 and 5 is the [Grid Documentation][GridDoc].
|
Apple's [Xcode website][Xcode] is the go-to reference for 1, and the definitive reference for 4 and 5 is the [Grid Documentation][GridDoc].
|
||||||
|
|
||||||
@ -91,33 +92,60 @@ launchctl setenv GridPkg /opt/local</string>
|
|||||||
</plist>
|
</plist>
|
||||||
```
|
```
|
||||||
|
|
||||||
## 3. Install and build Grid pre-requisites
|
## 3. Install and build Open MPI -- ***optional***
|
||||||
|
|
||||||
|
Download the latest version of [Open MPI][OMPI] version 3.1 (I used 3.1.5) and build it like so:
|
||||||
|
|
||||||
|
[OMPI]: https://www.open-mpi.org/software/ompi/v3.1/
|
||||||
|
|
||||||
|
../configure CC=clang CXX=clang++ CXXFLAGS=-g --prefix=$GridPre/bin
|
||||||
|
make -j 4 all install
|
||||||
|
|
||||||
|
***Note the `/bin` at the end of the prefix - this is required. As a quirk of the OpenMPI installer, `--prefix` must point to the `bin` subdirectory, with other files installed in `$GridPre/include`, `$GridPre/lib`, `$GridPre/share`, etc.***
|
||||||
|
|
||||||
|
Grid does not have any dependencies on fortran, however many standard scientific packages do, so you may wish to download GNU fortran (e.g. MacPorts ``gfortran`` package) and add the following to your configure invocation:
|
||||||
|
|
||||||
|
F77=gfortran FC=gfortran
|
||||||
|
|
||||||
|
## 4. Install and build Grid pre-requisites
|
||||||
|
|
||||||
To simplify the installation of **Grid pre-requisites**, you can use your favourite package manager, e.g.:
|
To simplify the installation of **Grid pre-requisites**, you can use your favourite package manager, e.g.:
|
||||||
|
|
||||||
### 3.1. [MacPorts][MacPorts]
|
### 1. [MacPorts][MacPorts]
|
||||||
|
|
||||||
[MacPorts]: https://www.macports.org "MacPorts package manager"
|
[MacPorts]: https://www.macports.org "MacPorts package manager"
|
||||||
|
|
||||||
Install [MacPorts][MacPorts] if you haven't done so already, and then install packages with:
|
Install [MacPorts][MacPorts] if you haven't done so already, and then install packages with:
|
||||||
|
|
||||||
sudo port install openmpi git-flow-avh gmp hdf5 mpfr fftw-3-single lapack wget autoconf automake bison cmake gawk libomp
|
sudo port install <portname>
|
||||||
|
|
||||||
On a Mac without GPUs:
|
These are the `portname`s for mandatory Grid libraries:
|
||||||
|
|
||||||
sudo port install OpenBLAS +native
|
* git-flow-avh
|
||||||
|
* gmp
|
||||||
|
* hdf5
|
||||||
|
* mpfr
|
||||||
|
|
||||||
To use `Gnu sha256sum`:
|
and these are the `portname`s for optional Grid libraries:
|
||||||
|
|
||||||
pushd /opt/local/bin; sudo ln -s gsha256sum sha256sum; popd
|
* fftw-3-single
|
||||||
|
* lapack
|
||||||
|
* doxygen
|
||||||
|
* OpenBLAS
|
||||||
|
|
||||||
These `port`s are not strictly necessary, but they are helpful:
|
***Please update this list with any packages I've missed! ... and double-check whether OpenBLAS is really for Grid. NB: lapack doesn't seem to work. Should it be scalapack?***
|
||||||
|
|
||||||
sudo port install gnuplot gsl h5utils nasm rclone texinfo tree xorg-server
|
### 2. [Homebrew][Homebrew]
|
||||||
|
|
||||||
***Please update this list with any packages I've missed!***
|
[Homebrew]: https://brew.sh "Homebrew package manager"
|
||||||
|
|
||||||
#### Install LIME
|
Install [Homebrew][Homebrew] if you haven't done so already, and then install packages with:
|
||||||
|
|
||||||
|
sudo brew install <packagename>
|
||||||
|
|
||||||
|
The same packages are available as from MacPorts.
|
||||||
|
|
||||||
|
### Install LIME ***optional***
|
||||||
|
|
||||||
There isn't currently a port for [C-LIME][C-LIME], so download the source and then build it:
|
There isn't currently a port for [C-LIME][C-LIME], so download the source and then build it:
|
||||||
|
|
||||||
@ -126,19 +154,9 @@ There isn't currently a port for [C-LIME][C-LIME], so download the source and th
|
|||||||
../configure CC=clang --prefix=$GridPre
|
../configure CC=clang --prefix=$GridPre
|
||||||
make -j 4 all install
|
make -j 4 all install
|
||||||
|
|
||||||
### 3.2. [Homebrew][Homebrew]
|
## 5. Install, Configure and Build Grid
|
||||||
|
|
||||||
[Homebrew]: https://brew.sh "Homebrew package manager"
|
### 5.1 Install Grid
|
||||||
|
|
||||||
Install [Homebrew][Homebrew] if you haven't done so already, and then install packages with:
|
|
||||||
|
|
||||||
sudo brew install <packagename>
|
|
||||||
|
|
||||||
I don't use Homebrew, so I'm not sure what the Brew package name equivalents are. ** Please update if you know **
|
|
||||||
|
|
||||||
## 4. Install, Configure and Build Grid
|
|
||||||
|
|
||||||
### 4.1 Install Grid
|
|
||||||
|
|
||||||
[Grid]: https://github.com/paboyle/Grid
|
[Grid]: https://github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -156,7 +174,7 @@ or
|
|||||||
|
|
||||||
depending on how many times you like to enter your password.
|
depending on how many times you like to enter your password.
|
||||||
|
|
||||||
### 4.2 Configure Grid
|
### 5.2 Configure Grid
|
||||||
|
|
||||||
The Xcode build system supports multiple configurations for each project, by default: `Debug` and `Release`, but more configurations can be defined. We will create separate Grid build directories for each configuration, using the Grid **Autoconf** build system to make each configuration. NB: it is **not** necessary to run `make install` on them once they are built (IDE features such as *jump to definition* will work better of you don't).
|
The Xcode build system supports multiple configurations for each project, by default: `Debug` and `Release`, but more configurations can be defined. We will create separate Grid build directories for each configuration, using the Grid **Autoconf** build system to make each configuration. NB: it is **not** necessary to run `make install` on them once they are built (IDE features such as *jump to definition* will work better of you don't).
|
||||||
|
|
||||||
@ -180,7 +198,7 @@ Debug configuration with MPI:
|
|||||||
|
|
||||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx --prefix=$GridPre/MPIDebug
|
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx --prefix=$GridPre/MPIDebug
|
||||||
|
|
||||||
### 4.3 Build Grid
|
### 5.3 Build Grid
|
||||||
|
|
||||||
Each configuration must be built before they can be used. You can either:
|
Each configuration must be built before they can be used. You can either:
|
||||||
|
|
||||||
|
@ -1,44 +0,0 @@
|
|||||||
#!/bin/bash -l
|
|
||||||
#SBATCH --job-name=bench_lehner
|
|
||||||
#SBATCH --partition=small-g
|
|
||||||
#SBATCH --nodes=2
|
|
||||||
#SBATCH --ntasks-per-node=8
|
|
||||||
#SBATCH --cpus-per-task=7
|
|
||||||
#SBATCH --gpus-per-node=8
|
|
||||||
#SBATCH --time=00:10:00
|
|
||||||
#SBATCH --account=project_465000546
|
|
||||||
#SBATCH --gpu-bind=none
|
|
||||||
#SBATCH --exclusive
|
|
||||||
#SBATCH --mem=0
|
|
||||||
|
|
||||||
CPU_BIND="map_cpu:48,56,32,40,16,24,1,8"
|
|
||||||
echo $CPU_BIND
|
|
||||||
|
|
||||||
cat << EOF > select_gpu
|
|
||||||
#!/bin/bash
|
|
||||||
export GPU_MAP=(0 1 2 3 4 5 6 7)
|
|
||||||
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
|
|
||||||
export HIP_VISIBLE_DEVICES=\$GPU
|
|
||||||
unset ROCR_VISIBLE_DEVICES
|
|
||||||
echo RANK \$SLURM_LOCALID using GPU \$GPU
|
|
||||||
exec \$*
|
|
||||||
EOF
|
|
||||||
|
|
||||||
chmod +x ./select_gpu
|
|
||||||
|
|
||||||
root=/scratch/project_465000546/boylepet/Grid/systems/Lumi
|
|
||||||
source ${root}/sourceme.sh
|
|
||||||
|
|
||||||
export OMP_NUM_THREADS=7
|
|
||||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
|
||||||
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
|
|
||||||
|
|
||||||
for vol in 16.16.16.64 32.32.32.64 32.32.32.128
|
|
||||||
do
|
|
||||||
srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.ov.$vol
|
|
||||||
#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol
|
|
||||||
|
|
||||||
srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol
|
|
||||||
#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
|
|
||||||
done
|
|
||||||
|
|
@ -3,28 +3,30 @@ spack load gmp
|
|||||||
spack load mpfr
|
spack load mpfr
|
||||||
CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-`
|
CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-`
|
||||||
GMP=`spack find --paths gmp | grep gmp | cut -c 12-`
|
GMP=`spack find --paths gmp | grep gmp | cut -c 12-`
|
||||||
MPFR=`spack find --paths mpfr | grep mpfr | cut -c 13-`
|
MPFR=`spack find --paths mpfr | grep mpfr | cut -c 12-`
|
||||||
echo clime X$CLIME
|
echo clime $CLIME
|
||||||
echo gmp X$GMP
|
echo gmp $GMP
|
||||||
echo mpfr X$MPFR
|
echo mpfr $MPFR
|
||||||
|
|
||||||
../../configure \
|
../../configure --enable-comms=mpi-auto \
|
||||||
--enable-comms=mpi-auto \
|
|
||||||
--with-lime=$CLIME \
|
--with-lime=$CLIME \
|
||||||
--enable-unified=no \
|
--enable-unified=no \
|
||||||
--enable-shm=nvlink \
|
--enable-shm=nvlink \
|
||||||
|
--enable-tracing=timer \
|
||||||
--enable-accelerator=hip \
|
--enable-accelerator=hip \
|
||||||
--enable-gen-simd-width=64 \
|
--enable-gen-simd-width=64 \
|
||||||
--enable-simd=GPU \
|
--enable-simd=GPU \
|
||||||
--enable-accelerator-cshift \
|
--disable-accelerator-cshift \
|
||||||
--with-gmp=$GMP \
|
--with-gmp=$OLCF_GMP_ROOT \
|
||||||
--with-mpfr=$MPFR \
|
|
||||||
--with-fftw=$FFTW_DIR/.. \
|
--with-fftw=$FFTW_DIR/.. \
|
||||||
|
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
|
||||||
--disable-fermion-reps \
|
--disable-fermion-reps \
|
||||||
--disable-gparity \
|
--disable-gparity \
|
||||||
CXX=hipcc MPICXX=mpicxx \
|
CXX=hipcc MPICXX=mpicxx \
|
||||||
CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++14 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \
|
CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 --amdgpu-target=gfx90a" \
|
||||||
LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp"
|
LDFLAGS="-L/lib64 -L/opt/rocm-5.2.0/lib/ -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 --amdgpu-target=gfx90a "
|
||||||
|
|
||||||
|
|
||||||
|
#--enable-simd=GPU-RRII \
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1 @@
|
|||||||
source ~/spack/share/spack/setup-env.sh
|
module load CrayEnv LUMI/22.12 partition/G cray-fftw/3.3.10.1
|
||||||
module load CrayEnv LUMI/22.12 partition/G cray-fftw/3.3.10.1 rocm
|
|
||||||
spack load c-lime
|
|
||||||
spack load gmp
|
|
||||||
spack load mpfr
|
|
||||||
|
@ -1,53 +0,0 @@
|
|||||||
1. Prerequisites:
|
|
||||||
===================
|
|
||||||
Make sure you have the latest Intel ipcx release loaded (via modules or similar)
|
|
||||||
Make sure you have SYCL aware MPICH or Intel MPI loaded (assumed as mpicxx)
|
|
||||||
|
|
||||||
2. Obtain Grid:
|
|
||||||
===================
|
|
||||||
|
|
||||||
bash$
|
|
||||||
git clone https://github.com/paboyle/Grid
|
|
||||||
cd Grid
|
|
||||||
./bootstrap.sh
|
|
||||||
cd systems/PVC
|
|
||||||
|
|
||||||
3. Build Grid:
|
|
||||||
===================
|
|
||||||
|
|
||||||
Here, configure command is stored in file config-command:
|
|
||||||
|
|
||||||
bash$
|
|
||||||
../../configure \
|
|
||||||
--enable-simd=GPU \
|
|
||||||
--enable-gen-simd-width=64 \
|
|
||||||
--enable-comms=mpi-auto \
|
|
||||||
--enable-accelerator-cshift \
|
|
||||||
--disable-gparity \
|
|
||||||
--disable-fermion-reps \
|
|
||||||
--enable-shm=nvlink \
|
|
||||||
--enable-accelerator=sycl \
|
|
||||||
--enable-unified=no \
|
|
||||||
MPICXX=mpicxx \
|
|
||||||
CXX=icpx \
|
|
||||||
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader " \
|
|
||||||
CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare "
|
|
||||||
|
|
||||||
make all
|
|
||||||
|
|
||||||
4. Run a benchmark:
|
|
||||||
===================
|
|
||||||
|
|
||||||
*** Assumes interactive access to node. ***
|
|
||||||
|
|
||||||
run Benchmark_dwf_fp32 using benchmarks/bench.sh
|
|
||||||
|
|
||||||
bash$
|
|
||||||
cd benchmarks
|
|
||||||
./bench.sh
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
export EnableImplicitScaling=0
|
|
||||||
export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
|
|
||||||
export ZE_AFFINITY_MASK=$gpu_id.$tile_id
|
|
||||||
export ONEAPI_DEVICE_FILTER=gpu,level_zero
|
|
||||||
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
|
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
|
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
|
|
||||||
|
|
||||||
mpiexec -launcher ssh -n 1 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads 16 --shm-mpi 1 --shm 2048 --device-mem 32768 | tee 1tile.log
|
|
||||||
mpiexec -launcher ssh -n 2 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads 16 --shm-mpi 1 --shm 2048 --device-mem 32768 | tee 2tile.log
|
|
||||||
|
|
||||||
#mpiexec -launcher ssh -n 4 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.2.2 --grid 16.16.64.64 --accelerator-threads 16 --shm-mpi 0 --shm 2048 --device-mem 32768 | tee 4tile.log
|
|
||||||
#mpiexec -launcher ssh -n 8 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.2.4 --grid 16.16.64.128 --accelerator-threads 16 --shm-mpi 0 --shm 2048 --device-mem 32768 | tee 8tile.log
|
|
||||||
|
|
||||||
|
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
num_tile=2
|
|
||||||
|
|
||||||
gpu_id=$(( (MPI_LOCAL_RANKID % num_tile ) ))
|
|
||||||
tile_id=$((MPI_LOCAL_RANKID / num_tile))
|
|
||||||
|
|
||||||
export ZE_AFFINITY_MASK=$gpu_id.$tile_id
|
|
||||||
|
|
||||||
echo "local rank $MPI_LOCALRANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK"
|
|
||||||
|
|
||||||
"$@"
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
|||||||
../../configure \
|
|
||||||
--enable-simd=GPU \
|
|
||||||
--enable-gen-simd-width=64 \
|
|
||||||
--enable-comms=mpi-auto \
|
|
||||||
--enable-accelerator-cshift \
|
|
||||||
--disable-gparity \
|
|
||||||
--disable-fermion-reps \
|
|
||||||
--enable-shm=nvlink \
|
|
||||||
--enable-accelerator=sycl \
|
|
||||||
--enable-unified=no \
|
|
||||||
MPICXX=mpicxx \
|
|
||||||
CXX=icpx \
|
|
||||||
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader " \
|
|
||||||
CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare "
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
|||||||
export https_proxy=http://proxy-chain.intel.com:911
|
|
||||||
module load intel-release
|
|
||||||
module load intel/mpich
|
|
@ -1,46 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
#PBS -l select=1:system=sunspot,place=scatter
|
|
||||||
#PBS -A LatticeQCD_aesp_CNDA
|
|
||||||
#PBS -l walltime=01:00:00
|
|
||||||
#PBS -N dwf
|
|
||||||
#PBS -k doe
|
|
||||||
|
|
||||||
HDIR=/home/paboyle/
|
|
||||||
module use /soft/testing/modulefiles/
|
|
||||||
module load intel-UMD23.05.25593.11/23.05.25593.11
|
|
||||||
module load tools/pti-gpu
|
|
||||||
export LD_LIBRARY_PATH=$HDIR/tools/lib64:$LD_LIBRARY_PATH
|
|
||||||
export PATH=$HDIR/tools/bin:$PATH
|
|
||||||
|
|
||||||
export TZ='/usr/share/zoneinfo/US/Central'
|
|
||||||
export OMP_PROC_BIND=spread
|
|
||||||
export OMP_NUM_THREADS=3
|
|
||||||
unset OMP_PLACES
|
|
||||||
|
|
||||||
cd $PBS_O_WORKDIR
|
|
||||||
|
|
||||||
qsub jobscript.pbs
|
|
||||||
|
|
||||||
echo Jobid: $PBS_JOBID
|
|
||||||
echo Running on host `hostname`
|
|
||||||
echo Running on nodes `cat $PBS_NODEFILE`
|
|
||||||
|
|
||||||
echo NODES
|
|
||||||
cat $PBS_NODEFILE
|
|
||||||
NNODES=`wc -l < $PBS_NODEFILE`
|
|
||||||
NRANKS=12 # Number of MPI ranks per node
|
|
||||||
NDEPTH=4 # Number of hardware threads per rank, spacing between MPI ranks on a node
|
|
||||||
NTHREADS=$OMP_NUM_THREADS # Number of OMP threads per rank, given to OMP_NUM_THREADS
|
|
||||||
|
|
||||||
NTOTRANKS=$(( NNODES * NRANKS ))
|
|
||||||
|
|
||||||
echo "NUM_NODES=${NNODES} TOTAL_RANKS=${NTOTRANKS} RANKS_PER_NODE=${NRANKS} THREADS_PER_RANK=${OMP_NUM_THREADS}"
|
|
||||||
echo "OMP_PROC_BIND=$OMP_PROC_BIND OMP_PLACES=$OMP_PLACES"
|
|
||||||
|
|
||||||
|
|
||||||
CMD="mpiexec -np ${NTOTRANKS} -ppn ${NRANKS} -d ${NDEPTH} --cpu-bind=depth -envall \
|
|
||||||
./gpu_tile_compact.sh \
|
|
||||||
./Benchmark_dwf_fp32 --mpi 1.1.2.6 --grid 16.32.64.192 --comms-overlap \
|
|
||||||
--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32"
|
|
||||||
|
|
@ -1,52 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
display_help() {
|
|
||||||
echo " Will map gpu tile to rank in compact and then round-robin fashion"
|
|
||||||
echo " Usage (only work for one node of ATS/PVC):"
|
|
||||||
echo " mpiexec --np N gpu_tile_compact.sh ./a.out"
|
|
||||||
echo
|
|
||||||
echo " Example 3 GPU of 2 Tiles with 7 Ranks:"
|
|
||||||
echo " 0 Rank 0.0"
|
|
||||||
echo " 1 Rank 0.1"
|
|
||||||
echo " 2 Rank 1.0"
|
|
||||||
echo " 3 Rank 1.1"
|
|
||||||
echo " 4 Rank 2.0"
|
|
||||||
echo " 5 Rank 2.1"
|
|
||||||
echo " 6 Rank 0.0"
|
|
||||||
echo
|
|
||||||
echo " Hacked together by apl@anl.gov, please contact if bug found"
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
#This give the exact GPU count i915 knows about and I use udev to only enumerate the devices with physical presence.
|
|
||||||
#works? num_gpu=$(/usr/bin/udevadm info /sys/module/i915/drivers/pci\:i915/* |& grep -v Unknown | grep -c "P: /devices")
|
|
||||||
num_gpu=6
|
|
||||||
num_tile=2
|
|
||||||
|
|
||||||
if [ "$#" -eq 0 ] || [ "$1" == "--help" ] || [ "$1" == "-h" ] || [ "$num_gpu" = 0 ]; then
|
|
||||||
display_help
|
|
||||||
fi
|
|
||||||
|
|
||||||
gpu_id=$(( (PALS_LOCAL_RANKID / num_tile ) % num_gpu ))
|
|
||||||
tile_id=$((PALS_LOCAL_RANKID % num_tile))
|
|
||||||
|
|
||||||
unset EnableWalkerPartition
|
|
||||||
export EnableImplicitScaling=0
|
|
||||||
export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
|
|
||||||
export ZE_AFFINITY_MASK=$gpu_id.$tile_id
|
|
||||||
export ONEAPI_DEVICE_FILTER=gpu,level_zero
|
|
||||||
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
|
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
|
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
|
|
||||||
#export SYCL_PI_LEVEL_ZERO_USM_RESIDENT=1
|
|
||||||
|
|
||||||
echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK"
|
|
||||||
|
|
||||||
if [ $PALS_LOCAL_RANKID = 0 ]
|
|
||||||
then
|
|
||||||
onetrace --chrome-device-timeline "$@"
|
|
||||||
# "$@"
|
|
||||||
else
|
|
||||||
"$@"
|
|
||||||
fi
|
|
@ -1,16 +0,0 @@
|
|||||||
TOOLS=$HOME/tools
|
|
||||||
../../configure \
|
|
||||||
--enable-simd=GPU \
|
|
||||||
--enable-gen-simd-width=64 \
|
|
||||||
--enable-comms=mpi-auto \
|
|
||||||
--enable-accelerator-cshift \
|
|
||||||
--disable-gparity \
|
|
||||||
--disable-fermion-reps \
|
|
||||||
--enable-shm=nvlink \
|
|
||||||
--enable-accelerator=sycl \
|
|
||||||
--enable-unified=no \
|
|
||||||
MPICXX=mpicxx \
|
|
||||||
CXX=icpx \
|
|
||||||
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -lapmidg -L$TOOLS/lib64/" \
|
|
||||||
CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include"
|
|
||||||
|
|
@ -1,307 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
grid` physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: ./tests/Test_cshift.cc
|
|
||||||
|
|
||||||
Copyright (C) 2015
|
|
||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#include <Grid/Grid.h>
|
|
||||||
|
|
||||||
using namespace Grid;
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
|
||||||
{
|
|
||||||
Grid_init(&argc,&argv);
|
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
|
||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
|
||||||
|
|
||||||
Coordinate latt_size = GridDefaultLatt();
|
|
||||||
Coordinate simd_layout( { vComplexD::Nsimd(),1,1,1});
|
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
|
||||||
|
|
||||||
int vol = 1;
|
|
||||||
for(int d=0;d<latt_size.size();d++){
|
|
||||||
vol = vol * latt_size[d];
|
|
||||||
}
|
|
||||||
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
|
|
||||||
GridRedBlackCartesian RBGRID(&GRID);
|
|
||||||
|
|
||||||
ComplexD ci(0.0,1.0);
|
|
||||||
|
|
||||||
std::vector<int> seeds({1,2,3,4});
|
|
||||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds); // naughty seeding
|
|
||||||
GridParallelRNG pRNG(&GRID);
|
|
||||||
pRNG.SeedFixedIntegers(seeds);
|
|
||||||
|
|
||||||
LatticeGaugeFieldD Umu(&GRID);
|
|
||||||
|
|
||||||
SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
|
||||||
// PF prop
|
|
||||||
////////////////////////////////////////////////////
|
|
||||||
LatticeFermionD src(&GRID);
|
|
||||||
|
|
||||||
gaussian(pRNG,src);
|
|
||||||
#if 1
|
|
||||||
Coordinate point(4,0);
|
|
||||||
src=Zero();
|
|
||||||
SpinColourVectorD ferm; gaussian(sRNG,ferm);
|
|
||||||
pokeSite(ferm,src,point);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
{
|
|
||||||
std::cout<<"****************************************"<<std::endl;
|
|
||||||
std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator \n";
|
|
||||||
std::cout<<"****************************************"<<std::endl;
|
|
||||||
|
|
||||||
// LatticeFermionD src(&GRID); gaussian(pRNG,src);
|
|
||||||
LatticeFermionD tmp(&GRID);
|
|
||||||
LatticeFermionD ref(&GRID);
|
|
||||||
LatticeFermionD diff(&GRID);
|
|
||||||
|
|
||||||
const int Ls=48+1;
|
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
|
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
|
|
||||||
|
|
||||||
RealD mass=0.1;
|
|
||||||
RealD M5 =0.8;
|
|
||||||
OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,0.001,8.0);
|
|
||||||
|
|
||||||
// Momentum space prop
|
|
||||||
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
|
|
||||||
bool fiveD = false; //calculate 4d free propagator
|
|
||||||
|
|
||||||
std::cout << " Free propagator " <<std::endl;
|
|
||||||
Dov.FreePropagator(src,ref,mass) ;
|
|
||||||
std::cout << " Free propagator norm "<< norm2(ref) <<std::endl;
|
|
||||||
|
|
||||||
Gamma G5(Gamma::Algebra::Gamma5);
|
|
||||||
|
|
||||||
LatticeFermionD src5(FGrid); src5=Zero();
|
|
||||||
LatticeFermionD tmp5(FGrid);
|
|
||||||
LatticeFermionD result5(FGrid); result5=Zero();
|
|
||||||
LatticeFermionD result4(&GRID);
|
|
||||||
const int sdir=0;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Import
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
std::cout << " Free propagator Import "<< norm2(src) <<std::endl;
|
|
||||||
Dov.ImportPhysicalFermionSource (src,src5);
|
|
||||||
std::cout << " Free propagator Imported "<< norm2(src5) <<std::endl;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Conjugate gradient on normal equations system
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
|
|
||||||
Dov.Mdag(src5,tmp5);
|
|
||||||
src5=tmp5;
|
|
||||||
MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
|
|
||||||
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
|
|
||||||
CG(HermOp,src5,result5);
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Domain wall physical field propagator
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
Dov.ExportPhysicalFermionSolution(result5,result4);
|
|
||||||
|
|
||||||
// From DWF4d.pdf :
|
|
||||||
//
|
|
||||||
// Dov_pf = 2/(1-m) D_cayley_ovlap [ Page 43 ]
|
|
||||||
// Dinv_cayley_ovlap = 2/(1-m) Dinv_pf
|
|
||||||
// Dinv_cayley_surface =1/(1-m) ( Dinv_cayley_ovlap - 1 ) => 2/(1-m)^2 Dinv_pf - 1/(1-m) * src [ Eq.2.67 ]
|
|
||||||
|
|
||||||
RealD scale = 2.0/(1.0-mass)/(1.0-mass);
|
|
||||||
result4 = result4 * scale;
|
|
||||||
result4 = result4 - src*(1.0/(1.0-mass)); // Subtract contact term
|
|
||||||
DumpSliceNorm("Src",src);
|
|
||||||
DumpSliceNorm("Grid",result4);
|
|
||||||
DumpSliceNorm("Fourier",ref);
|
|
||||||
|
|
||||||
std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
|
|
||||||
std::cout << "Dov ref "<<norm2(ref)<<std::endl;
|
|
||||||
|
|
||||||
diff = result4- ref;
|
|
||||||
DumpSliceNorm("diff ",diff);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
|
||||||
// Dwf prop
|
|
||||||
////////////////////////////////////////////////////
|
|
||||||
{
|
|
||||||
std::cout<<"****************************************"<<std::endl;
|
|
||||||
std::cout << "Testing Dov(Hw) Mom space 4d propagator \n";
|
|
||||||
std::cout<<"****************************************"<<std::endl;
|
|
||||||
|
|
||||||
LatticeFermionD tmp(&GRID);
|
|
||||||
LatticeFermionD ref(&GRID);
|
|
||||||
LatticeFermionD diff(&GRID);
|
|
||||||
|
|
||||||
const int Ls=48;
|
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
|
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
|
|
||||||
|
|
||||||
RealD mass=0.1;
|
|
||||||
RealD M5 =0.8;
|
|
||||||
|
|
||||||
OverlapWilsonCayleyTanhFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,1.0);
|
|
||||||
|
|
||||||
// Momentum space prop
|
|
||||||
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
|
|
||||||
Dov.FreePropagator(src,ref,mass) ;
|
|
||||||
|
|
||||||
Gamma G5(Gamma::Algebra::Gamma5);
|
|
||||||
|
|
||||||
LatticeFermionD src5(FGrid); src5=Zero();
|
|
||||||
LatticeFermionD tmp5(FGrid);
|
|
||||||
LatticeFermionD result5(FGrid); result5=Zero();
|
|
||||||
LatticeFermionD result4(&GRID);
|
|
||||||
const int sdir=0;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Domain wall physical field source; need D_minus
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
/*
|
|
||||||
chi_5[0] = chiralProjectPlus(chi);
|
|
||||||
chi_5[Ls-1]= chiralProjectMinus(chi);
|
|
||||||
*/
|
|
||||||
tmp = (src + G5*src)*0.5; InsertSlice(tmp,src5, 0,sdir);
|
|
||||||
tmp = (src - G5*src)*0.5; InsertSlice(tmp,src5,Ls-1,sdir);
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Conjugate gradient on normal equations system
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
|
|
||||||
Dov.Dminus(src5,tmp5);
|
|
||||||
src5=tmp5;
|
|
||||||
Dov.Mdag(src5,tmp5);
|
|
||||||
src5=tmp5;
|
|
||||||
MdagMLinearOperator<OverlapWilsonCayleyTanhFermionD,LatticeFermionD> HermOp(Dov);
|
|
||||||
ConjugateGradient<LatticeFermionD> CG(1.0e-16,10000);
|
|
||||||
CG(HermOp,src5,result5);
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Domain wall physical field propagator
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
/*
|
|
||||||
psi = chiralProjectMinus(psi_5[0]);
|
|
||||||
psi += chiralProjectPlus(psi_5[Ls-1]);
|
|
||||||
*/
|
|
||||||
ExtractSlice(tmp,result5,0 ,sdir); result4 = (tmp-G5*tmp)*0.5;
|
|
||||||
ExtractSlice(tmp,result5,Ls-1,sdir); result4 = result4+(tmp+G5*tmp)*0.5;
|
|
||||||
|
|
||||||
std::cout << " Taking difference" <<std::endl;
|
|
||||||
std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
|
|
||||||
std::cout << "Dov ref "<<norm2(ref)<<std::endl;
|
|
||||||
DumpSliceNorm("Grid",result4);
|
|
||||||
DumpSliceNorm("Fourier",ref);
|
|
||||||
diff = ref - result4;
|
|
||||||
std::cout << "result - ref "<<norm2(diff)<<std::endl;
|
|
||||||
|
|
||||||
DumpSliceNorm("diff",diff);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
{
|
|
||||||
std::cout<<"****************************************"<<std::endl;
|
|
||||||
std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator with q\n";
|
|
||||||
std::cout<<"****************************************"<<std::endl;
|
|
||||||
|
|
||||||
// LatticeFermionD src(&GRID); gaussian(pRNG,src);
|
|
||||||
LatticeFermionD tmp(&GRID);
|
|
||||||
LatticeFermionD ref(&GRID);
|
|
||||||
LatticeFermionD diff(&GRID);
|
|
||||||
|
|
||||||
const int Ls=48+1;
|
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
|
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
|
|
||||||
|
|
||||||
RealD mass=0.1;
|
|
||||||
RealD M5 =0.8;
|
|
||||||
OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,0.001,8.0);
|
|
||||||
|
|
||||||
// Momentum space prop
|
|
||||||
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
|
|
||||||
bool fiveD = false; //calculate 4d free propagator
|
|
||||||
|
|
||||||
std::cout << " Free propagator " <<std::endl;
|
|
||||||
Dov.FreePropagator(src,ref,mass) ;
|
|
||||||
std::cout << " Free propagator norm "<< norm2(ref) <<std::endl;
|
|
||||||
|
|
||||||
Gamma G5(Gamma::Algebra::Gamma5);
|
|
||||||
|
|
||||||
LatticeFermionD src5(FGrid); src5=Zero();
|
|
||||||
LatticeFermionD tmp5(FGrid);
|
|
||||||
LatticeFermionD result5(FGrid); result5=Zero();
|
|
||||||
LatticeFermionD result4(&GRID);
|
|
||||||
const int sdir=0;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Import
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
std::cout << " Free propagator Import "<< norm2(src) <<std::endl;
|
|
||||||
Dov.ImportPhysicalFermionSource (src,src5);
|
|
||||||
std::cout << " Free propagator Imported "<< norm2(src5) <<std::endl;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Conjugate gradient on normal equations system
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
|
|
||||||
Dov.Mdag(src5,tmp5);
|
|
||||||
src5=tmp5;
|
|
||||||
MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
|
|
||||||
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
|
|
||||||
CG(HermOp,src5,result5);
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Domain wall physical field propagator
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
Dov.ExportPhysicalFermionSolution(result5,result4);
|
|
||||||
|
|
||||||
// From DWF4d.pdf :
|
|
||||||
//
|
|
||||||
// Dov_pf = 2/(1-m) D_cayley_ovlap [ Page 43 ]
|
|
||||||
// Dinv_cayley_ovlap = 2/(1-m) Dinv_pf
|
|
||||||
// Dinv_cayley_surface =1/(1-m) ( Dinv_cayley_ovlap - 1 ) => 2/(1-m)^2 Dinv_pf - 1/(1-m) * src [ Eq.2.67 ]
|
|
||||||
|
|
||||||
RealD scale = 2.0/(1.0-mass)/(1.0-mass);
|
|
||||||
result4 = result4 * scale;
|
|
||||||
result4 = result4 - src*(1.0/(1.0-mass)); // Subtract contact term
|
|
||||||
DumpSliceNorm("Src",src);
|
|
||||||
DumpSliceNorm("Grid",result4);
|
|
||||||
DumpSliceNorm("Fourier",ref);
|
|
||||||
|
|
||||||
std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
|
|
||||||
std::cout << "Dov ref "<<norm2(ref)<<std::endl;
|
|
||||||
|
|
||||||
diff = result4- ref;
|
|
||||||
DumpSliceNorm("diff ",diff);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Grid_finalize();
|
|
||||||
}
|
|
@ -1,188 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: ./tests/Test_iwasaki_action_newstaple.cc
|
|
||||||
|
|
||||||
Copyright (C) 2015
|
|
||||||
|
|
||||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#include <Grid/Grid.h>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
using namespace Grid;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// PlaqPlusRectangleActoin
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
template<class Gimpl>
|
|
||||||
class PlaqPlusRectangleActionOrig : public Action<typename Gimpl::GaugeField> {
|
|
||||||
public:
|
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
|
||||||
|
|
||||||
private:
|
|
||||||
RealD c_plaq;
|
|
||||||
RealD c_rect;
|
|
||||||
|
|
||||||
public:
|
|
||||||
PlaqPlusRectangleActionOrig(RealD b,RealD c): c_plaq(b),c_rect(c){};
|
|
||||||
|
|
||||||
virtual std::string action_name(){return "PlaqPlusRectangleActionOrig";}
|
|
||||||
|
|
||||||
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {}; // noop as no pseudoferms
|
|
||||||
|
|
||||||
virtual std::string LogParameters(){
|
|
||||||
std::stringstream sstream;
|
|
||||||
sstream << GridLogMessage << "["<<action_name() <<"] c_plaq: " << c_plaq << std::endl;
|
|
||||||
sstream << GridLogMessage << "["<<action_name() <<"] c_rect: " << c_rect << std::endl;
|
|
||||||
return sstream.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
virtual RealD S(const GaugeField &U) {
|
|
||||||
RealD vol = U.Grid()->gSites();
|
|
||||||
|
|
||||||
RealD plaq = WilsonLoops<Gimpl>::avgPlaquette(U);
|
|
||||||
RealD rect = WilsonLoops<Gimpl>::avgRectangle(U);
|
|
||||||
|
|
||||||
RealD action=c_plaq*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5
|
|
||||||
+c_rect*(1.0 -rect)*(Nd*(Nd-1.0))*vol;
|
|
||||||
|
|
||||||
return action;
|
|
||||||
};
|
|
||||||
|
|
||||||
virtual void deriv(const GaugeField &Umu,GaugeField & dSdU) {
|
|
||||||
//extend Ta to include Lorentz indexes
|
|
||||||
RealD factor_p = c_plaq/RealD(Nc)*0.5;
|
|
||||||
RealD factor_r = c_rect/RealD(Nc)*0.5;
|
|
||||||
|
|
||||||
GridBase *grid = Umu.Grid();
|
|
||||||
|
|
||||||
std::vector<GaugeLinkField> U (Nd,grid);
|
|
||||||
std::vector<GaugeLinkField> U2(Nd,grid);
|
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
|
||||||
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu],U[mu],mu);
|
|
||||||
}
|
|
||||||
|
|
||||||
GaugeLinkField dSdU_mu(grid);
|
|
||||||
GaugeLinkField staple(grid);
|
|
||||||
|
|
||||||
for (int mu=0; mu < Nd; mu++){
|
|
||||||
|
|
||||||
// Staple in direction mu
|
|
||||||
|
|
||||||
WilsonLoops<Gimpl>::Staple(staple,Umu,mu);
|
|
||||||
|
|
||||||
dSdU_mu = Ta(U[mu]*staple)*factor_p;
|
|
||||||
|
|
||||||
WilsonLoops<Gimpl>::RectStaple(Umu,staple,U2,U,mu);
|
|
||||||
|
|
||||||
dSdU_mu = dSdU_mu + Ta(U[mu]*staple)*factor_r;
|
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
// Convenience for common physically defined cases.
|
|
||||||
//
|
|
||||||
// RBC c1 parameterisation is not really RBC but don't have good
|
|
||||||
// reference and we are happy to change name if prior use of this plaq coeff
|
|
||||||
// parameterisation is made known to us.
|
|
||||||
template<class Gimpl>
|
|
||||||
class RBCGaugeActionOrig : public PlaqPlusRectangleActionOrig<Gimpl> {
|
|
||||||
public:
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
|
||||||
RBCGaugeActionOrig(RealD beta,RealD c1) : PlaqPlusRectangleActionOrig<Gimpl>(beta*(1.0-8.0*c1), beta*c1) {};
|
|
||||||
virtual std::string action_name(){return "RBCGaugeActionOrig";}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class Gimpl>
|
|
||||||
class IwasakiGaugeActionOrig : public RBCGaugeActionOrig<Gimpl> {
|
|
||||||
public:
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
|
||||||
IwasakiGaugeActionOrig(RealD beta) : RBCGaugeActionOrig<Gimpl>(beta,-0.331) {};
|
|
||||||
virtual std::string action_name(){return "IwasakiGaugeActionOrig";}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
|
||||||
{
|
|
||||||
Grid_init(&argc,&argv);
|
|
||||||
|
|
||||||
Coordinate latt_size = GridDefaultLatt();
|
|
||||||
Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
|
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
|
||||||
std::cout << " mpi "<<mpi_layout<<std::endl;
|
|
||||||
std::cout << " simd "<<simd_layout<<std::endl;
|
|
||||||
std::cout << " latt "<<latt_size<<std::endl;
|
|
||||||
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
|
|
||||||
|
|
||||||
GridParallelRNG pRNG(&GRID);
|
|
||||||
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
|
||||||
LatticeGaugeField U(&GRID);
|
|
||||||
|
|
||||||
SU<Nc>::HotConfiguration(pRNG,U);
|
|
||||||
|
|
||||||
//#define PRD
|
|
||||||
#ifdef PRD
|
|
||||||
typedef PeriodicGimplD Gimpl;
|
|
||||||
#else
|
|
||||||
typedef ConjugateGimplD Gimpl;
|
|
||||||
std::vector<int> conj_dirs(Nd,0); conj_dirs[0]=1; conj_dirs[3]=1;
|
|
||||||
Gimpl::setDirections(conj_dirs);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef typename WilsonLoops<Gimpl>::GaugeMat GaugeMat;
|
|
||||||
typedef typename WilsonLoops<Gimpl>::GaugeLorentz GaugeLorentz;
|
|
||||||
|
|
||||||
GaugeLorentz derivOrig(&GRID), derivNew(&GRID);
|
|
||||||
double beta = 2.13;
|
|
||||||
IwasakiGaugeActionOrig<Gimpl> action_orig(beta);
|
|
||||||
IwasakiGaugeAction<Gimpl> action_new(beta);
|
|
||||||
|
|
||||||
double torig=0, tnew=0;
|
|
||||||
int ntest = 10;
|
|
||||||
for(int i=0;i<ntest;i++){
|
|
||||||
double t0 = usecond();
|
|
||||||
action_orig.deriv(U, derivOrig);
|
|
||||||
double t1 = usecond();
|
|
||||||
action_new.deriv(U, derivNew);
|
|
||||||
double t2 = usecond();
|
|
||||||
|
|
||||||
GaugeLorentz diff = derivOrig - derivNew;
|
|
||||||
double n = norm2(diff);
|
|
||||||
std::cout << GridLogMessage << "Difference " << n << " (expect 0)" << std::endl;
|
|
||||||
assert(n<1e-10);
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Timings orig: " << (t1-t0)/1000 << "ms, new: " << (t2-t1)/1000 << "ms" << std::endl;
|
|
||||||
torig += (t1-t0)/1000; tnew += (t2-t1)/1000;
|
|
||||||
}
|
|
||||||
std::cout << GridLogMessage << "Avg timings " << ntest << " iterations: orig:" << torig/ntest << "ms, new:" << tnew/ntest << "ms" << std::endl;
|
|
||||||
|
|
||||||
Grid_finalize();
|
|
||||||
}
|
|
@ -1,94 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: ./tests/Test_optimized_staple_gaugebc.cc
|
|
||||||
|
|
||||||
Copyright (C) 2015
|
|
||||||
|
|
||||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#include <Grid/Grid.h>
|
|
||||||
#include <Grid/lattice/PaddedCell.h>
|
|
||||||
#include <Grid/stencil/GeneralLocalStencil.h>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
using namespace Grid;
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
|
||||||
{
|
|
||||||
Grid_init(&argc,&argv);
|
|
||||||
|
|
||||||
Coordinate latt_size = GridDefaultLatt();
|
|
||||||
Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
|
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
|
||||||
std::cout << " mpi "<<mpi_layout<<std::endl;
|
|
||||||
std::cout << " simd "<<simd_layout<<std::endl;
|
|
||||||
std::cout << " latt "<<latt_size<<std::endl;
|
|
||||||
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
|
|
||||||
|
|
||||||
GridParallelRNG pRNG(&GRID);
|
|
||||||
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
|
||||||
LatticeGaugeField U(&GRID);
|
|
||||||
|
|
||||||
SU<Nc>::HotConfiguration(pRNG,U);
|
|
||||||
|
|
||||||
//#define PRD
|
|
||||||
#ifdef PRD
|
|
||||||
typedef PeriodicGimplD Gimpl;
|
|
||||||
#else
|
|
||||||
typedef ConjugateGimplD Gimpl;
|
|
||||||
std::vector<int> conj_dirs(Nd,0); conj_dirs[0]=1; conj_dirs[3]=1;
|
|
||||||
Gimpl::setDirections(conj_dirs);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef typename WilsonLoops<Gimpl>::GaugeMat GaugeMat;
|
|
||||||
typedef typename WilsonLoops<Gimpl>::GaugeLorentz GaugeLorentz;
|
|
||||||
|
|
||||||
int count = 0;
|
|
||||||
double torig=0, topt=0;
|
|
||||||
|
|
||||||
std::vector<GaugeMat> Umu(Nd,&GRID), U2(Nd,&GRID);
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
Umu[mu] = PeekIndex<LorentzIndex>(U,mu);
|
|
||||||
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu], Umu[mu], mu);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Checking optimized vs unoptimized RectStaple" << std::endl;
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
GaugeMat staple_orig(&GRID), staple_opt(&GRID), staple_U2(&GRID);
|
|
||||||
double t0 = usecond();
|
|
||||||
WilsonLoops<Gimpl>::RectStapleUnoptimised(staple_orig,U,mu);
|
|
||||||
double t1 = usecond();
|
|
||||||
WilsonLoops<Gimpl>::RectStapleOptimised(staple_opt, U2, Umu, mu);
|
|
||||||
double t2 = usecond();
|
|
||||||
torig += t1-t0; topt += t2-t1;
|
|
||||||
++count;
|
|
||||||
|
|
||||||
GaugeMat diff = staple_orig - staple_opt;
|
|
||||||
double n = norm2(diff);
|
|
||||||
std::cout << GridLogMessage << mu << " " << n << std::endl;
|
|
||||||
assert(n<1e-10);
|
|
||||||
}
|
|
||||||
std::cout << GridLogMessage << "RectStaple timings orig: " << torig/1000/count << "ms, optimized: " << topt/1000/count << "ms" << std::endl;
|
|
||||||
|
|
||||||
Grid_finalize();
|
|
||||||
}
|
|
@ -1,580 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: ./tests/Test_padded_cell_staple.cc
|
|
||||||
|
|
||||||
Copyright (C) 2015
|
|
||||||
|
|
||||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#include <Grid/Grid.h>
|
|
||||||
#include <Grid/lattice/PaddedCell.h>
|
|
||||||
#include <Grid/stencil/GeneralLocalStencil.h>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
using namespace Grid;
|
|
||||||
|
|
||||||
template <class Gimpl> class WilsonLoopsTest : public Gimpl {
|
|
||||||
public:
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
|
||||||
|
|
||||||
typedef typename Gimpl::GaugeLinkField GaugeMat;
|
|
||||||
typedef typename Gimpl::GaugeField GaugeLorentz;
|
|
||||||
|
|
||||||
|
|
||||||
//Original implementation
|
|
||||||
static void StapleOrig(GaugeMat &staple, const GaugeLorentz &Umu, int mu,
|
|
||||||
int nu) {
|
|
||||||
|
|
||||||
GridBase *grid = Umu.Grid();
|
|
||||||
|
|
||||||
std::vector<GaugeMat> U(Nd, grid);
|
|
||||||
for (int d = 0; d < Nd; d++) {
|
|
||||||
U[d] = PeekIndex<LorentzIndex>(Umu, d);
|
|
||||||
}
|
|
||||||
staple = Zero();
|
|
||||||
|
|
||||||
if (nu != mu) {
|
|
||||||
|
|
||||||
// mu
|
|
||||||
// ^
|
|
||||||
// |__> nu
|
|
||||||
|
|
||||||
// __
|
|
||||||
// |
|
|
||||||
// __|
|
|
||||||
//
|
|
||||||
|
|
||||||
//Forward: Out(x) = Link(x)*field(x+mu)
|
|
||||||
//Backward: Out(x) = Link^dag(x-mu)*field(x-mu)
|
|
||||||
//ShiftStaple: Link(x) = Link(x+mu)
|
|
||||||
|
|
||||||
//tmp1 = U^dag_nu(x-nu)
|
|
||||||
//tmp2 = U^dag_mu(x-mu) tmp1(x-mu) = U^dag_mu(x-mu) U^dag_nu(x-nu-mu)
|
|
||||||
//tmp3 = U_nu(x) tmp2(x+nu) = U_nu(x)U^dag_mu(x-mu+nu) U^dag_nu(x-mu)
|
|
||||||
//tmp4 = tmp(x+mu) = U_nu(x+mu)U^dag_mu(x+nu) U^dag_nu(x)
|
|
||||||
|
|
||||||
staple += Gimpl::ShiftStaple(
|
|
||||||
Gimpl::CovShiftForward(
|
|
||||||
U[nu], nu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[mu], mu, Gimpl::CovShiftIdentityBackward(U[nu], nu))),
|
|
||||||
mu);
|
|
||||||
|
|
||||||
// __
|
|
||||||
// |
|
|
||||||
// |__
|
|
||||||
//
|
|
||||||
//
|
|
||||||
|
|
||||||
//tmp1 = U_mu^dag(x-mu) U_nu(x-mu)
|
|
||||||
//tmp2 = U_nu^dag(x-nu) tmp1(x-nu) = U_nu^dag(x-nu) U_mu^dag(x-mu-nu) U_nu(x-mu-nu)
|
|
||||||
//tmp3 = tmp2(x+mu) = U_nu^dag(x-nu+mu) U_mu^dag(x-nu) U_nu(x-nu)
|
|
||||||
staple += Gimpl::ShiftStaple(
|
|
||||||
Gimpl::CovShiftBackward(U[nu], nu,
|
|
||||||
Gimpl::CovShiftBackward(U[mu], mu, U[nu])),
|
|
||||||
mu);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void StaplePadded(GaugeMat &staple, const GaugeLorentz &U, int mu,
|
|
||||||
int nu) {
|
|
||||||
if(nu==mu){
|
|
||||||
staple = Zero();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
double peek = 0, construct = 0, exchange = 0, coord = 0, stencil =0, kernel = 0, extract = 0, total = 0;
|
|
||||||
|
|
||||||
double tstart = usecond();
|
|
||||||
double t=tstart;
|
|
||||||
|
|
||||||
PaddedCell Ghost(1, (GridCartesian*)U.Grid());
|
|
||||||
|
|
||||||
construct += usecond() - t;
|
|
||||||
|
|
||||||
t=usecond();
|
|
||||||
GaugeMat U_mu = PeekIndex<LorentzIndex>(U, mu);
|
|
||||||
GaugeMat U_nu = PeekIndex<LorentzIndex>(U, nu);
|
|
||||||
peek += usecond() - t;
|
|
||||||
|
|
||||||
t=usecond();
|
|
||||||
CshiftImplGauge<Gimpl> cshift_impl;
|
|
||||||
GaugeMat Ug_mu = Ghost.Exchange(U_mu, cshift_impl);
|
|
||||||
GaugeMat Ug_nu = Ghost.Exchange(U_nu, cshift_impl);
|
|
||||||
exchange += usecond() - t;
|
|
||||||
|
|
||||||
GridBase *ggrid = Ug_mu.Grid();
|
|
||||||
|
|
||||||
GaugeMat gStaple(ggrid);
|
|
||||||
|
|
||||||
t=usecond();
|
|
||||||
Coordinate shift_0(Nd,0);
|
|
||||||
Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
|
|
||||||
Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
|
|
||||||
Coordinate shift_mnu(Nd,0); shift_mnu[nu]=-1;
|
|
||||||
Coordinate shift_mnu_pmu(Nd,0); shift_mnu_pmu[nu]=-1; shift_mnu_pmu[mu]=1;
|
|
||||||
|
|
||||||
std::vector<Coordinate> shifts;
|
|
||||||
|
|
||||||
//U_nu(x+mu)U^dag_mu(x+nu) U^dag_nu(x)
|
|
||||||
shifts.push_back(shift_0);
|
|
||||||
shifts.push_back(shift_nu);
|
|
||||||
shifts.push_back(shift_mu);
|
|
||||||
|
|
||||||
//U_nu^dag(x-nu+mu) U_mu^dag(x-nu) U_nu(x-nu)
|
|
||||||
shifts.push_back(shift_mnu);
|
|
||||||
shifts.push_back(shift_mnu);
|
|
||||||
shifts.push_back(shift_mnu_pmu);
|
|
||||||
coord += usecond()-t;
|
|
||||||
|
|
||||||
t=usecond();
|
|
||||||
GeneralLocalStencil gStencil(ggrid,shifts);
|
|
||||||
stencil += usecond() -t;
|
|
||||||
|
|
||||||
t=usecond();
|
|
||||||
{
|
|
||||||
autoView( gStaple_v , gStaple, AcceleratorWrite);
|
|
||||||
auto gStencil_v = gStencil.View();
|
|
||||||
autoView( Ug_mu_v , Ug_mu, AcceleratorRead);
|
|
||||||
autoView( Ug_nu_v , Ug_nu, AcceleratorRead);
|
|
||||||
|
|
||||||
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
|
|
||||||
GeneralStencilEntry const* e = gStencil_v.GetEntry(0,ss);
|
|
||||||
auto Udag_nu_x = adj(coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(1,ss);
|
|
||||||
auto Udag_mu_xpnu = adj(coalescedReadGeneralPermute(Ug_mu_v[e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(2,ss);
|
|
||||||
auto U_nu_xpmu = coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd);
|
|
||||||
|
|
||||||
auto stencil_ss = U_nu_xpmu * Udag_mu_xpnu * Udag_nu_x;
|
|
||||||
|
|
||||||
e = gStencil_v.GetEntry(3,ss);
|
|
||||||
auto U_nu_xmnu = coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(4,ss);
|
|
||||||
auto Udag_mu_xmnu = adj(coalescedReadGeneralPermute(Ug_mu_v[e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(5,ss);
|
|
||||||
auto Udag_nu_xmnu_pmu = adj(coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd));
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + Udag_nu_xmnu_pmu * Udag_mu_xmnu * U_nu_xmnu;
|
|
||||||
|
|
||||||
coalescedWrite(gStaple_v[ss],stencil_ss);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
} //ensure views are all closed!
|
|
||||||
kernel += usecond() - t;
|
|
||||||
|
|
||||||
t=usecond();
|
|
||||||
staple = Ghost.Extract(gStaple);
|
|
||||||
extract += usecond()-t;
|
|
||||||
|
|
||||||
total += usecond() - tstart;
|
|
||||||
std::cout << GridLogMessage << "StaplePadded timings peek:" << peek << " construct:" << construct << " exchange:" << exchange << " coord:" << coord << " stencil:" << stencil << " kernel:" << kernel << " extract:" << extract << " total:" << total << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void RectStapleOrig(GaugeMat &Stap, const GaugeLorentz &Umu,
|
|
||||||
int mu) {
|
|
||||||
GridBase *grid = Umu.Grid();
|
|
||||||
|
|
||||||
std::vector<GaugeMat> U(Nd, grid);
|
|
||||||
for (int d = 0; d < Nd; d++) {
|
|
||||||
U[d] = PeekIndex<LorentzIndex>(Umu, d);
|
|
||||||
}
|
|
||||||
|
|
||||||
Stap = Zero();
|
|
||||||
|
|
||||||
for (int nu = 0; nu < Nd; nu++) {
|
|
||||||
if (nu != mu) {
|
|
||||||
// __ ___
|
|
||||||
// | __ |
|
|
||||||
//
|
|
||||||
//tmp1 = U_nu^dag(x-nu)
|
|
||||||
//tmp2 = U_mu^dag(x-mu)tmp1(x-mu) = U_mu^dag(x-mu) U_nu^dag(x-nu-mu)
|
|
||||||
//tmp3 = U_mu^dag(x-mu)tmp2(x-mu) = U_mu^dag(x-mu) U_mu^dag(x-2mu) U_nu^dag(x-nu-2mu)
|
|
||||||
//tmp4 = U_nu(x)tmp3(x+nu) = U_nu(x)U_mu^dag(x-mu+nu) U_mu^dag(x-2mu+nu) U_nu^dag(x-2mu)
|
|
||||||
//tmp5 = U_mu(x)tmp4(x+mu) = U_mu(x)U_nu(x+mu)U_mu^dag(x+nu) U_mu^dag(x-mu+nu) U_nu^dag(x-mu)
|
|
||||||
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
|
|
||||||
|
|
||||||
Stap += Gimpl::ShiftStaple(
|
|
||||||
Gimpl::CovShiftForward(
|
|
||||||
U[mu], mu,
|
|
||||||
Gimpl::CovShiftForward(
|
|
||||||
U[nu], nu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[mu], mu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[mu], mu,
|
|
||||||
Gimpl::CovShiftIdentityBackward(U[nu], nu))))),
|
|
||||||
mu);
|
|
||||||
|
|
||||||
// __
|
|
||||||
// |__ __ |
|
|
||||||
|
|
||||||
//tmp1 = U^dag_mu(x-mu)U_nu(x-mu)
|
|
||||||
//tmp2 = U^dag_mu(x-mu)tmp1(x-mu) = U^dag_mu(x-mu)U^dag_mu(x-2mu)U_nu(x-2mu)
|
|
||||||
//tmp3 = U^dag_nu(x-nu)tmp2(x-nu) = U^dag_nu(x-nu)U^dag_mu(x-mu-nu)U^dag_mu(x-2mu-nu)U_nu(x-2mu-nu)
|
|
||||||
//tmp4 = U_mu(x)tmp3(x+mu) = U_mu(x)U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)
|
|
||||||
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
|
|
||||||
|
|
||||||
Stap += Gimpl::ShiftStaple(
|
|
||||||
Gimpl::CovShiftForward(
|
|
||||||
U[mu], mu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[nu], nu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[mu], mu, Gimpl::CovShiftBackward(U[mu], mu, U[nu])))),
|
|
||||||
mu);
|
|
||||||
|
|
||||||
// __
|
|
||||||
// |__ __ |
|
|
||||||
//Forward: Out(x) = Link(x)*field(x+mu)
|
|
||||||
//Backward: Out(x) = Link^dag(x-mu)*field(x-mu)
|
|
||||||
//ShiftStaple: Link(x) = Link(x+mu)
|
|
||||||
|
|
||||||
//tmp1 = U_nu(x)U_mu(x+nu)
|
|
||||||
//tmp2 = U^dag_mu(x-mu)tmp1(x-mu) = U^dag_mu(x-mu)U_nu(x-mu)U_mu(x+nu-mu)
|
|
||||||
//tmp3 = U^dag_mu(x-mu)tmp2(x-mu) = U^dag_mu(x-mu)U^dag_mu(x-2mu)U_nu(x-2mu)U_mu(x+nu-2mu)
|
|
||||||
//tmp4 = U^dag_nu(x-nu)tmp3(x-nu) = U^dag_nu(x-nu)U^dag_mu(x-mu-nu)U^dag_mu(x-2mu-nu)U_nu(x-2mu-nu)U_mu(x-2mu)
|
|
||||||
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
|
|
||||||
Stap += Gimpl::ShiftStaple(
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[nu], nu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[mu], mu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[mu], mu, Gimpl::CovShiftForward(U[nu], nu, U[mu])))),
|
|
||||||
mu);
|
|
||||||
|
|
||||||
// __ ___
|
|
||||||
// |__ |
|
|
||||||
//tmp1 = U_nu^dag(x-nu)U_mu(x-nu)
|
|
||||||
//tmp2 = U_mu^dag(x-mu)tmp1(x-mu) = U_mu^dag(x-mu)U_nu^dag(x-mu-nu)U_mu(x-mu-nu)
|
|
||||||
//tmp3 = U_mu^dag(x-mu)tmp2(x-mu) = U_mu^dag(x-mu)U_mu^dag(x-2mu)U_nu^dag(x-2mu-nu)U_mu(x-2mu-nu)
|
|
||||||
//tmp4 = U_nu(x)tmp3(x+nu) = U_nu(x)U_mu^dag(x-mu+nu)U_mu^dag(x-2mu+nu)U_nu^dag(x-2mu)U_mu(x-2mu)
|
|
||||||
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
|
|
||||||
Stap += Gimpl::ShiftStaple(
|
|
||||||
Gimpl::CovShiftForward(
|
|
||||||
U[nu], nu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[mu], mu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[mu], mu, Gimpl::CovShiftBackward(U[nu], nu, U[mu])))),
|
|
||||||
mu);
|
|
||||||
|
|
||||||
// --
|
|
||||||
// | |
|
|
||||||
//
|
|
||||||
// | |
|
|
||||||
//tmp1 = U_nu^dag(x-nu)
|
|
||||||
//tmp2 = U_nu^dag(x-nu)tmp1(x-nu) = U_nu^dag(x-nu)U_nu^dag(x-2nu)
|
|
||||||
//tmp3 = U_mu^dag(x-mu)tmp2(x-mu) = U_mu^dag(x-mu)U_nu^dag(x-mu-nu)U_nu^dag(x-mu-2nu)
|
|
||||||
//tmp4 = U_nu(x)tmp3(x+nu) = U_nu(x)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_nu^dag(x-mu-nu)
|
|
||||||
//tmp5 = U_nu(x)tmp4(x+nu) = U_nu(x)U_nu(x+nu)U_mu^dag(x-mu+2nu)U_nu^dag(x-mu+nu)U_nu^dag(x-mu)
|
|
||||||
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
|
|
||||||
Stap += Gimpl::ShiftStaple(
|
|
||||||
Gimpl::CovShiftForward(
|
|
||||||
U[nu], nu,
|
|
||||||
Gimpl::CovShiftForward(
|
|
||||||
U[nu], nu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[mu], mu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[nu], nu,
|
|
||||||
Gimpl::CovShiftIdentityBackward(U[nu], nu))))),
|
|
||||||
mu);
|
|
||||||
|
|
||||||
// | |
|
|
||||||
//
|
|
||||||
// | |
|
|
||||||
// --
|
|
||||||
//tmp1 = U_nu(x)U_nu(x+nu)
|
|
||||||
//tmp2 = U_mu^dag(x-mu)tmp1(x-mu) = U_mu^dag(x-mu)U_nu(x-mu)U_nu(x-mu+nu)
|
|
||||||
//tmp3 = U_nu^dag(x-nu)tmp2(x-nu) = U_nu^dag(x-nu)U_mu^dag(x-mu-nu)U_nu(x-mu-nu)U_nu(x-mu)
|
|
||||||
//tmp4 = U_nu^dag(x-nu)tmp3(x-nu) = U_nu^dag(x-nu)U_nu^dag(x-2nu)U_mu^dag(x-mu-2nu)U_nu(x-mu-2nu)U_nu(x-mu-nu)
|
|
||||||
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
|
|
||||||
Stap += Gimpl::ShiftStaple(
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[nu], nu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[nu], nu,
|
|
||||||
Gimpl::CovShiftBackward(
|
|
||||||
U[mu], mu, Gimpl::CovShiftForward(U[nu], nu, U[nu])))),
|
|
||||||
mu);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void RectStaplePadded(GaugeMat &Stap, const GaugeLorentz &U,
|
|
||||||
int mu) {
|
|
||||||
PaddedCell Ghost(2,(GridCartesian*)U.Grid());
|
|
||||||
GridBase *ggrid = Ghost.grids.back();
|
|
||||||
|
|
||||||
CshiftImplGauge<Gimpl> cshift_impl;
|
|
||||||
std::vector<GaugeMat> Ug_dirs(Nd,ggrid);
|
|
||||||
for(int i=0;i<Nd;i++) Ug_dirs[i] = Ghost.Exchange(PeekIndex<LorentzIndex>(U, i), cshift_impl);
|
|
||||||
|
|
||||||
GaugeMat gStaple(ggrid);
|
|
||||||
|
|
||||||
std::vector<Coordinate> shifts;
|
|
||||||
for (int nu = 0; nu < Nd; nu++) {
|
|
||||||
if (nu != mu) {
|
|
||||||
auto genShift = [&](int mushift,int nushift){
|
|
||||||
Coordinate out(Nd,0); out[mu]=mushift; out[nu]=nushift; return out;
|
|
||||||
};
|
|
||||||
|
|
||||||
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
|
|
||||||
shifts.push_back(genShift(0,0));
|
|
||||||
shifts.push_back(genShift(0,+1));
|
|
||||||
shifts.push_back(genShift(+1,+1));
|
|
||||||
shifts.push_back(genShift(+2,0));
|
|
||||||
shifts.push_back(genShift(+1,0));
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
|
|
||||||
shifts.push_back(genShift(0,-1));
|
|
||||||
shifts.push_back(genShift(0,-1));
|
|
||||||
shifts.push_back(genShift(+1,-1));
|
|
||||||
shifts.push_back(genShift(+2,-1));
|
|
||||||
shifts.push_back(genShift(+1,0));
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
|
|
||||||
shifts.push_back(genShift(-1,0));
|
|
||||||
shifts.push_back(genShift(-1,-1));
|
|
||||||
shifts.push_back(genShift(-1,-1));
|
|
||||||
shifts.push_back(genShift(0,-1));
|
|
||||||
shifts.push_back(genShift(+1,-1));
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
|
|
||||||
shifts.push_back(genShift(-1,0));
|
|
||||||
shifts.push_back(genShift(-1,0));
|
|
||||||
shifts.push_back(genShift(-1,+1));
|
|
||||||
shifts.push_back(genShift(0,+1));
|
|
||||||
shifts.push_back(genShift(+1,0));
|
|
||||||
|
|
||||||
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
|
|
||||||
shifts.push_back(genShift(0,0));
|
|
||||||
shifts.push_back(genShift(0,+1));
|
|
||||||
shifts.push_back(genShift(0,+2));
|
|
||||||
shifts.push_back(genShift(+1,+1));
|
|
||||||
shifts.push_back(genShift(+1,0));
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
|
|
||||||
shifts.push_back(genShift(0,-1));
|
|
||||||
shifts.push_back(genShift(0,-2));
|
|
||||||
shifts.push_back(genShift(0,-2));
|
|
||||||
shifts.push_back(genShift(+1,-2));
|
|
||||||
shifts.push_back(genShift(+1,-1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
size_t nshift = shifts.size();
|
|
||||||
|
|
||||||
GeneralLocalStencil gStencil(ggrid,shifts);
|
|
||||||
{
|
|
||||||
autoView( gStaple_v , gStaple, AcceleratorWrite);
|
|
||||||
auto gStencil_v = gStencil.View();
|
|
||||||
|
|
||||||
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
|
|
||||||
size_t vsize = Nd*sizeof(GaugeViewType);
|
|
||||||
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
|
|
||||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = Ug_dirs[i].View(AcceleratorRead);
|
|
||||||
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
|
|
||||||
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
|
|
||||||
|
|
||||||
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
|
|
||||||
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
|
|
||||||
stencil_ss = Zero();
|
|
||||||
int s=0;
|
|
||||||
for(int nu=0;nu<Nd;nu++){
|
|
||||||
if(nu != mu){
|
|
||||||
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
|
|
||||||
GeneralStencilEntry const* e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
auto U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
auto U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
auto U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
e = gStencil_v.GetEntry(s++,ss);
|
|
||||||
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
|
|
||||||
|
|
||||||
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert(s==nshift);
|
|
||||||
coalescedWrite(gStaple_v[ss],stencil_ss);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
|
|
||||||
free(Ug_dirs_v_host);
|
|
||||||
acceleratorFreeDevice(Ug_dirs_v);
|
|
||||||
}
|
|
||||||
Stap = Ghost.Extract(gStaple);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
|
||||||
{
|
|
||||||
Grid_init(&argc,&argv);
|
|
||||||
|
|
||||||
Coordinate latt_size = GridDefaultLatt();
|
|
||||||
Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
|
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
|
||||||
std::cout << " mpi "<<mpi_layout<<std::endl;
|
|
||||||
std::cout << " simd "<<simd_layout<<std::endl;
|
|
||||||
std::cout << " latt "<<latt_size<<std::endl;
|
|
||||||
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
|
|
||||||
|
|
||||||
GridParallelRNG pRNG(&GRID);
|
|
||||||
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
|
||||||
LatticeGaugeField U(&GRID);
|
|
||||||
|
|
||||||
SU<Nc>::HotConfiguration(pRNG,U);
|
|
||||||
|
|
||||||
//typedef PeriodicGimplD Gimpl;
|
|
||||||
typedef ConjugateGimplD Gimpl;
|
|
||||||
std::vector<int> conj_dirs(Nd,0); conj_dirs[0]=1; conj_dirs[3]=1;
|
|
||||||
Gimpl::setDirections(conj_dirs);
|
|
||||||
|
|
||||||
typedef typename WilsonLoopsTest<Gimpl>::GaugeMat GaugeMat;
|
|
||||||
typedef typename WilsonLoopsTest<Gimpl>::GaugeLorentz GaugeLorentz;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Checking Staple" << std::endl;
|
|
||||||
int count = 0;
|
|
||||||
double torig=0, tpadded=0;
|
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
for(int nu=0;nu<Nd;nu++){
|
|
||||||
if(mu != nu){
|
|
||||||
GaugeMat staple_orig(&GRID), staple_padded(&GRID);
|
|
||||||
double t0 = usecond();
|
|
||||||
WilsonLoopsTest<Gimpl>::StapleOrig(staple_orig,U,mu,nu);
|
|
||||||
double t1 = usecond();
|
|
||||||
WilsonLoopsTest<Gimpl>::StaplePadded(staple_padded,U,mu,nu);
|
|
||||||
double t2 = usecond();
|
|
||||||
torig += t1-t0; tpadded += t2-t1;
|
|
||||||
++count;
|
|
||||||
|
|
||||||
GaugeMat diff = staple_orig - staple_padded;
|
|
||||||
double n = norm2(diff);
|
|
||||||
std::cout << GridLogMessage << mu << " " << nu << " " << n << std::endl;
|
|
||||||
assert(n<1e-10);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << GridLogMessage << "Staple timings orig: " << torig/1000/count << "ms, padded: " << tpadded/1000/count << "ms" << std::endl;
|
|
||||||
count=0; torig=tpadded=0;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Checking RectStaple" << std::endl;
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
GaugeMat staple_orig(&GRID), staple_padded(&GRID);
|
|
||||||
double t0 = usecond();
|
|
||||||
WilsonLoopsTest<Gimpl>::RectStapleOrig(staple_orig,U,mu);
|
|
||||||
double t1 = usecond();
|
|
||||||
WilsonLoopsTest<Gimpl>::RectStaplePadded(staple_padded,U,mu);
|
|
||||||
double t2 = usecond();
|
|
||||||
torig += t1-t0; tpadded += t2-t1;
|
|
||||||
++count;
|
|
||||||
|
|
||||||
GaugeMat diff = staple_orig - staple_padded;
|
|
||||||
double n = norm2(diff);
|
|
||||||
std::cout << GridLogMessage << mu << " " << n << std::endl;
|
|
||||||
assert(n<1e-10);
|
|
||||||
}
|
|
||||||
std::cout << GridLogMessage << "RectStaple timings orig: " << torig/1000/count << "ms, padded: " << tpadded/1000/count << "ms" << std::endl;
|
|
||||||
|
|
||||||
Grid_finalize();
|
|
||||||
}
|
|
Reference in New Issue
Block a user