mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 15:55:37 +00:00
1088 lines
36 KiB
C++
1088 lines
36 KiB
C++
/*************************************************************************************
|
|
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
Source file: ./lib/qcd/action/fermion/WilsonFermion5D.cc
|
|
|
|
Copyright (C) 2015
|
|
|
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
|
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
|
Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution directory
|
|
*************************************************************************************/
|
|
/* END LEGAL */
|
|
#include <Grid/qcd/action/fermion/FermionCore.h>
|
|
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
|
|
#include <Grid/perfmon/PerfCount.h>
|
|
|
|
namespace Grid {
|
|
namespace QCD {
|
|
|
|
// S-direction is INNERMOST and takes no part in the parity.
|
|
const std::vector<int> WilsonFermion5DStatic::directions ({1,2,3,4, 1, 2, 3, 4});
|
|
const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1});
|
|
|
|
// 5d lattice for DWF.
|
|
template<class Impl>
|
|
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
|
GridCartesian &FiveDimGrid,
|
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
|
GridCartesian &FourDimGrid,
|
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
|
RealD _M5,const ImplParams &p) :
|
|
Kernels(p),
|
|
_FiveDimGrid (&FiveDimGrid),
|
|
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
|
|
_FourDimGrid (&FourDimGrid),
|
|
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
|
|
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
|
|
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
|
|
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
|
|
M5(_M5),
|
|
Umu(_FourDimGrid),
|
|
UmuEven(_FourDimRedBlackGrid),
|
|
UmuOdd (_FourDimRedBlackGrid),
|
|
Lebesgue(_FourDimGrid),
|
|
LebesgueEvenOdd(_FourDimRedBlackGrid),
|
|
_tmp(&FiveDimRedBlackGrid)
|
|
{
|
|
// some assertions
|
|
assert(FiveDimGrid._ndimension==5);
|
|
assert(FourDimGrid._ndimension==4);
|
|
assert(FourDimRedBlackGrid._ndimension==4);
|
|
assert(FiveDimRedBlackGrid._ndimension==5);
|
|
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
|
|
|
|
// extent of fifth dim and not spread out
|
|
Ls=FiveDimGrid._fdimensions[0];
|
|
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
|
assert(FiveDimGrid._processors[0] ==1);
|
|
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
|
|
|
// Other dimensions must match the decomposition of the four-D fields
|
|
for(int d=0;d<4;d++){
|
|
|
|
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
|
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
|
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
|
|
|
|
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
|
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
|
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
|
|
|
|
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
|
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
|
|
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
|
|
}
|
|
|
|
if (Impl::LsVectorised) {
|
|
|
|
int nsimd = Simd::Nsimd();
|
|
|
|
// Dimension zero of the five-d is the Ls direction
|
|
assert(FiveDimGrid._simd_layout[0] ==nsimd);
|
|
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
|
|
|
|
for(int d=0;d<4;d++){
|
|
assert(FourDimGrid._simd_layout[d]=1);
|
|
assert(FourDimRedBlackGrid._simd_layout[d]=1);
|
|
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
|
|
}
|
|
|
|
} else {
|
|
|
|
// Dimension zero of the five-d is the Ls direction
|
|
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
|
|
assert(FiveDimGrid._simd_layout[0] ==1);
|
|
|
|
}
|
|
|
|
// Allocate the required comms buffer
|
|
ImportGauge(_Umu);
|
|
// Build lists of exterior only nodes
|
|
int LLs = FiveDimGrid._rdimensions[0];
|
|
int vol4;
|
|
vol4=FourDimGrid.oSites();
|
|
Stencil.BuildSurfaceList(LLs,vol4);
|
|
|
|
vol4=FourDimRedBlackGrid.oSites();
|
|
StencilEven.BuildSurfaceList(LLs,vol4);
|
|
StencilOdd.BuildSurfaceList(LLs,vol4);
|
|
|
|
// std::cout << GridLogMessage << " SurfaceLists "<< Stencil.surface_list.size()
|
|
// <<" " << StencilEven.surface_list.size()<<std::endl;
|
|
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::Report(void)
|
|
{
|
|
RealD NP = _FourDimGrid->_Nprocessors;
|
|
RealD NN = _FourDimGrid->NodeCount();
|
|
RealD volume = Ls;
|
|
std::vector<int> latt = _FourDimGrid->GlobalDimensions();
|
|
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
|
|
|
if ( DhopCalls > 0 ) {
|
|
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
|
std::cout << GridLogMessage << "WilsonFermion5D Number of DhopEO Calls : " << DhopCalls << std::endl;
|
|
std::cout << GridLogMessage << "WilsonFermion5D TotalTime /Calls : " << DhopTotalTime / DhopCalls << " us" << std::endl;
|
|
std::cout << GridLogMessage << "WilsonFermion5D CommTime /Calls : " << DhopCommTime / DhopCalls << " us" << std::endl;
|
|
std::cout << GridLogMessage << "WilsonFermion5D FaceTime /Calls : " << DhopFaceTime / DhopCalls << " us" << std::endl;
|
|
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime1/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl;
|
|
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime2/Calls : " << DhopComputeTime2/ DhopCalls << " us" << std::endl;
|
|
|
|
// Average the compute time
|
|
_FourDimGrid->GlobalSum(DhopComputeTime);
|
|
DhopComputeTime/=NP;
|
|
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
|
|
|
RealD Fullmflops = 1344*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
|
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
|
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
|
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
|
|
|
}
|
|
|
|
if ( DerivCalls > 0 ) {
|
|
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
|
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
|
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
|
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
|
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
|
|
|
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
|
|
|
RealD Fullmflops = 144*volume*DerivCalls/(DerivDhopComputeTime+DerivCommTime)/2; // 2 for red black counting
|
|
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
|
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NP << std::endl; }
|
|
|
|
if (DerivCalls > 0 || DhopCalls > 0){
|
|
std::cout << GridLogMessage << "WilsonFermion5D Stencil" <<std::endl; Stencil.Report();
|
|
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
|
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd" <<std::endl; StencilOdd.Report();
|
|
}
|
|
if ( DhopCalls > 0){
|
|
std::cout << GridLogMessage << "WilsonFermion5D Stencil Reporti()" <<std::endl; Stencil.Reporti(DhopCalls);
|
|
std::cout << GridLogMessage << "WilsonFermion5D StencilEven Reporti()"<<std::endl; StencilEven.Reporti(DhopCalls);
|
|
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd Reporti()" <<std::endl; StencilOdd.Reporti(DhopCalls);
|
|
}
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::ZeroCounters(void) {
|
|
DhopCalls = 0;
|
|
DhopCommTime = 0;
|
|
DhopComputeTime = 0;
|
|
DhopComputeTime2= 0;
|
|
DhopFaceTime = 0;
|
|
DhopTotalTime = 0;
|
|
|
|
DerivCalls = 0;
|
|
DerivCommTime = 0;
|
|
DerivComputeTime = 0;
|
|
DerivDhopComputeTime = 0;
|
|
|
|
Stencil.ZeroCounters();
|
|
StencilEven.ZeroCounters();
|
|
StencilOdd.ZeroCounters();
|
|
Stencil.ZeroCountersi();
|
|
StencilEven.ZeroCountersi();
|
|
StencilOdd.ZeroCountersi();
|
|
}
|
|
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
|
|
{
|
|
GaugeField HUmu(_Umu._grid);
|
|
HUmu = _Umu*(-0.5);
|
|
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
|
|
pickCheckerboard(Even,UmuEven,Umu);
|
|
pickCheckerboard(Odd ,UmuOdd,Umu);
|
|
}
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp)
|
|
{
|
|
int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
|
|
// we drop off the innermost fifth dimension
|
|
// assert( (disp==1)||(disp==-1) );
|
|
// assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
|
|
|
|
Compressor compressor(DaggerNo);
|
|
Stencil.HaloExchange(in,compressor);
|
|
|
|
int skip = (disp==1) ? 0 : 1;
|
|
|
|
int dirdisp = dir+skip*4;
|
|
int gamma = dir+(1-skip)*4;
|
|
|
|
assert(dirdisp<=7);
|
|
assert(dirdisp>=0);
|
|
|
|
parallel_for(int ss=0;ss<Umu._grid->oSites();ss++){
|
|
for(int s=0;s<Ls;s++){
|
|
int sU=ss;
|
|
int sF = s+Ls*sU;
|
|
Kernels::DhopDir(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
|
|
}
|
|
}
|
|
};
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|
DoubledGaugeField & U,
|
|
GaugeField &mat,
|
|
const FermionField &A,
|
|
const FermionField &B,
|
|
int dag)
|
|
{
|
|
DerivCalls++;
|
|
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
|
|
|
conformable(st._grid,A._grid);
|
|
conformable(st._grid,B._grid);
|
|
|
|
Compressor compressor(dag);
|
|
|
|
FermionField Btilde(B._grid);
|
|
FermionField Atilde(B._grid);
|
|
|
|
DerivCommTime-=usecond();
|
|
st.HaloExchange(B,compressor);
|
|
DerivCommTime+=usecond();
|
|
|
|
Atilde=A;
|
|
int LLs = B._grid->_rdimensions[0];
|
|
|
|
|
|
DerivComputeTime-=usecond();
|
|
for (int mu = 0; mu < Nd; mu++) {
|
|
////////////////////////////////////////////////////////////////////////
|
|
// Flip gamma if dag
|
|
////////////////////////////////////////////////////////////////////////
|
|
int gamma = mu;
|
|
if (!dag) gamma += Nd;
|
|
|
|
////////////////////////
|
|
// Call the single hop
|
|
////////////////////////
|
|
|
|
DerivDhopComputeTime -= usecond();
|
|
parallel_for (int sss = 0; sss < U._grid->oSites(); sss++) {
|
|
for (int s = 0; s < Ls; s++) {
|
|
int sU = sss;
|
|
int sF = s + Ls * sU;
|
|
|
|
assert(sF < B._grid->oSites());
|
|
assert(sU < U._grid->oSites());
|
|
|
|
Kernels::DhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
|
|
|
|
////////////////////////////
|
|
// spin trace outer product
|
|
////////////////////////////
|
|
}
|
|
}
|
|
////////////////////////////
|
|
// spin trace outer product
|
|
////////////////////////////
|
|
DerivDhopComputeTime += usecond();
|
|
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
|
|
}
|
|
DerivComputeTime += usecond();
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
|
|
const FermionField &A,
|
|
const FermionField &B,
|
|
int dag)
|
|
{
|
|
conformable(A._grid,FermionGrid());
|
|
conformable(A._grid,B._grid);
|
|
|
|
//conformable(GaugeGrid(),mat._grid);// this is not general! leaving as a comment
|
|
|
|
mat.checkerboard = A.checkerboard;
|
|
|
|
DerivInternal(Stencil,Umu,mat,A,B,dag);
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
|
const FermionField &A,
|
|
const FermionField &B,
|
|
int dag)
|
|
{
|
|
conformable(A._grid,FermionRedBlackGrid());
|
|
conformable(A._grid,B._grid);
|
|
|
|
assert(B.checkerboard==Odd);
|
|
assert(A.checkerboard==Even);
|
|
mat.checkerboard = Even;
|
|
|
|
DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
|
|
}
|
|
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
|
const FermionField &A,
|
|
const FermionField &B,
|
|
int dag)
|
|
{
|
|
conformable(A._grid,FermionRedBlackGrid());
|
|
conformable(A._grid,B._grid);
|
|
|
|
assert(B.checkerboard==Even);
|
|
assert(A.checkerboard==Odd);
|
|
mat.checkerboard = Odd;
|
|
|
|
DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
|
DoubledGaugeField & U,
|
|
const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
DhopTotalTime-=usecond();
|
|
#ifdef GRID_OMP
|
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
|
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
|
else
|
|
#endif
|
|
DhopInternalSerialComms(st,lo,U,in,out,dag);
|
|
DhopTotalTime+=usecond();
|
|
}
|
|
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, LebesgueOrder &lo,
|
|
DoubledGaugeField & U,
|
|
const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
#ifdef GRID_OMP
|
|
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
|
|
|
Compressor compressor(dag);
|
|
|
|
int LLs = in._grid->_rdimensions[0];
|
|
int len = U._grid->oSites();
|
|
|
|
DhopFaceTime-=usecond();
|
|
st.HaloExchangeOptGather(in,compressor);
|
|
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
|
DhopFaceTime+=usecond();
|
|
|
|
double ctime=0;
|
|
double ptime=0;
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Ugly explicit thread mapping introduced for OPA reasons.
|
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
|
|
{
|
|
int tid = omp_get_thread_num();
|
|
int nthreads = omp_get_num_threads();
|
|
int ncomms = CartesianCommunicator::nCommThreads;
|
|
if (ncomms == -1) ncomms = 1;
|
|
assert(nthreads > ncomms);
|
|
if (tid >= ncomms) {
|
|
double start = usecond();
|
|
nthreads -= ncomms;
|
|
int ttid = tid - ncomms;
|
|
int n = U._grid->oSites();
|
|
int chunk = n / nthreads;
|
|
int rem = n % nthreads;
|
|
int myblock, myn;
|
|
if (ttid < rem) {
|
|
myblock = ttid * chunk + ttid;
|
|
myn = chunk+1;
|
|
} else {
|
|
myblock = ttid*chunk + rem;
|
|
myn = chunk;
|
|
}
|
|
|
|
// do the compute
|
|
if (dag == DaggerYes) {
|
|
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
|
int sU = ss;
|
|
int sF = LLs * sU;
|
|
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
|
|
}
|
|
} else {
|
|
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
|
int sU = ss;
|
|
int sF = LLs * sU;
|
|
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
|
|
}
|
|
}
|
|
ptime = usecond() - start;
|
|
} else {
|
|
double start = usecond();
|
|
st.CommunicateThreaded();
|
|
ctime = usecond() - start;
|
|
}
|
|
}
|
|
DhopCommTime += ctime;
|
|
DhopComputeTime+=ptime;
|
|
|
|
// First to enter, last to leave timing
|
|
st.CollateThreads();
|
|
|
|
DhopFaceTime-=usecond();
|
|
st.CommsMerge(compressor);
|
|
DhopFaceTime+=usecond();
|
|
|
|
DhopComputeTime2-=usecond();
|
|
if (dag == DaggerYes) {
|
|
int sz=st.surface_list.size();
|
|
parallel_for (int ss = 0; ss < sz; ss++) {
|
|
int sU = st.surface_list[ss];
|
|
int sF = LLs * sU;
|
|
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
|
|
}
|
|
} else {
|
|
int sz=st.surface_list.size();
|
|
parallel_for (int ss = 0; ss < sz; ss++) {
|
|
int sU = st.surface_list[ss];
|
|
int sF = LLs * sU;
|
|
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
|
|
}
|
|
}
|
|
DhopComputeTime2+=usecond();
|
|
#else
|
|
assert(0);
|
|
#endif
|
|
}
|
|
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo,
|
|
DoubledGaugeField & U,
|
|
const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
|
Compressor compressor(dag);
|
|
|
|
int LLs = in._grid->_rdimensions[0];
|
|
|
|
DhopCommTime-=usecond();
|
|
st.HaloExchangeOpt(in,compressor);
|
|
DhopCommTime+=usecond();
|
|
|
|
DhopComputeTime-=usecond();
|
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
|
|
|
if (dag == DaggerYes) {
|
|
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
|
int sU = ss;
|
|
int sF = LLs * sU;
|
|
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
|
}
|
|
} else {
|
|
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
|
int sU = ss;
|
|
int sF = LLs * sU;
|
|
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
|
}
|
|
}
|
|
DhopComputeTime+=usecond();
|
|
}
|
|
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
DhopCalls++;
|
|
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
|
conformable(in._grid,out._grid); // drops the cb check
|
|
|
|
assert(in.checkerboard==Even);
|
|
out.checkerboard = Odd;
|
|
|
|
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
|
|
}
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
DhopCalls++;
|
|
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
|
conformable(in._grid,out._grid); // drops the cb check
|
|
|
|
assert(in.checkerboard==Odd);
|
|
out.checkerboard = Even;
|
|
|
|
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
|
|
}
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
DhopCalls+=2;
|
|
conformable(in._grid,FermionGrid()); // verifies full grid
|
|
conformable(in._grid,out._grid);
|
|
|
|
out.checkerboard = in.checkerboard;
|
|
|
|
DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
|
|
}
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
out.checkerboard=in.checkerboard;
|
|
Dhop(in,out,dag); // -0.5 is included
|
|
axpy(out,4.0-M5,in,out);
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in, RealD mass,std::vector<double> twist)
|
|
{
|
|
// what type LatticeComplex
|
|
GridBase *_grid = _FourDimGrid;
|
|
GridBase *_5dgrid = _FiveDimGrid;
|
|
|
|
conformable(_5dgrid,out._grid);
|
|
|
|
FermionField PRsource(_5dgrid);
|
|
FermionField PLsource(_5dgrid);
|
|
FermionField buf1_4d(_grid);
|
|
FermionField buf2_4d(_grid);
|
|
FermionField GR(_5dgrid);
|
|
FermionField GL(_5dgrid);
|
|
FermionField bufL_4d(_grid);
|
|
FermionField bufR_4d(_grid);
|
|
|
|
unsigned int Ls = in._grid->_rdimensions[0];
|
|
|
|
typedef typename FermionField::vector_type vector_type;
|
|
typedef typename FermionField::scalar_type ScalComplex;
|
|
typedef iSinglet<ScalComplex> Tcomplex;
|
|
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
|
|
|
Gamma::Algebra Gmu [] = {
|
|
Gamma::Algebra::GammaX,
|
|
Gamma::Algebra::GammaY,
|
|
Gamma::Algebra::GammaZ,
|
|
Gamma::Algebra::GammaT
|
|
};
|
|
|
|
Gamma g5(Gamma::Algebra::Gamma5);
|
|
|
|
std::vector<int> latt_size = _grid->_fdimensions;
|
|
|
|
LatComplex sk(_grid); sk = zero;
|
|
LatComplex sk2(_grid); sk2= zero;
|
|
LatComplex W(_grid); W= zero;
|
|
LatComplex a(_grid); a= zero;
|
|
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
|
LatComplex cosha(_grid);
|
|
LatComplex kmu(_grid);
|
|
LatComplex Wea(_grid);
|
|
LatComplex Wema(_grid);
|
|
LatComplex sinha(_grid);
|
|
LatComplex sinhaLs(_grid);
|
|
LatComplex coshaLs(_grid);
|
|
LatComplex A(_grid);
|
|
LatComplex F(_grid);
|
|
LatComplex App(_grid);
|
|
LatComplex Amm(_grid);
|
|
LatComplex Bpp(_grid);
|
|
LatComplex Bmm(_grid);
|
|
LatComplex ABpm(_grid); //Apm=Amp=Bpm=Bmp
|
|
LatComplex signW(_grid);
|
|
|
|
ScalComplex ci(0.0,1.0);
|
|
|
|
for(int mu=0;mu<Nd;mu++) {
|
|
|
|
LatticeCoordinate(kmu,mu);
|
|
|
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
|
|
|
kmu = TwoPiL * kmu;
|
|
kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
|
|
|
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
|
|
sk = sk + sin(kmu) *sin(kmu);
|
|
}
|
|
|
|
W = one - M5 + sk2;
|
|
|
|
////////////////////////////////////////////
|
|
// Cosh alpha -> alpha
|
|
////////////////////////////////////////////
|
|
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
|
|
|
// FIXME Need a Lattice acosh
|
|
for(int idx=0;idx<_grid->lSites();idx++){
|
|
std::vector<int> lcoor(Nd);
|
|
Tcomplex cc;
|
|
RealD sgn;
|
|
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
|
peekLocalSite(cc,cosha,lcoor);
|
|
assert((double)real(cc)>=1.0);
|
|
assert(fabs((double)imag(cc))<=1.0e-15);
|
|
cc = ScalComplex(::acosh(real(cc)),0.0);
|
|
pokeLocalSite(cc,a,lcoor);
|
|
}
|
|
|
|
Wea = ( exp( a) * abs(W) );
|
|
Wema= ( exp(-a) * abs(W) );
|
|
sinha = 0.5*(exp( a) - exp(-a));
|
|
sinhaLs = 0.5*(exp( a*Ls) - exp(-a*Ls));
|
|
coshaLs = 0.5*(exp( a*Ls) + exp(-a*Ls));
|
|
|
|
A = one / (abs(W) * sinha * 2.0) * one / (sinhaLs * 2.0);
|
|
F = exp( a*Ls) * (one - Wea + (Wema - one) * mass*mass);
|
|
F = F + exp(-a*Ls) * (Wema - one + (one - Wea) * mass*mass);
|
|
F = F - abs(W) * sinha * 4.0 * mass;
|
|
|
|
Bpp = (A/F) * (exp(-a*Ls*2.0) - one) * (one - Wema) * (one - mass*mass * one);
|
|
Bmm = (A/F) * (one - exp(a*Ls*2.0)) * (one - Wea) * (one - mass*mass * one);
|
|
App = (A/F) * (exp(-a*Ls*2.0) - one) * exp(-a) * (exp(-a) - abs(W)) * (one - mass*mass * one);
|
|
Amm = (A/F) * (one - exp(a*Ls*2.0)) * exp(a) * (exp(a) - abs(W)) * (one - mass*mass * one);
|
|
ABpm = (A/F) * abs(W) * sinha * 2.0 * (one + mass * coshaLs * 2.0 + mass*mass * one);
|
|
|
|
//P+ source, P- source
|
|
PRsource = (in + g5 * in) * 0.5;
|
|
PLsource = (in - g5 * in) * 0.5;
|
|
|
|
//calculate GR, GL
|
|
for(unsigned int ss=1;ss<=Ls;ss++)
|
|
{
|
|
bufR_4d = zero;
|
|
bufL_4d = zero;
|
|
for(unsigned int tt=1;tt<=Ls;tt++)
|
|
{
|
|
//possible sign if W<0
|
|
if((ss+tt)%2==1) signW = abs(W)/W;
|
|
else signW = one;
|
|
|
|
unsigned int f = (ss > tt) ? ss-tt : tt-ss; //f = abs(ss-tt)
|
|
//GR
|
|
buf1_4d = zero;
|
|
ExtractSlice(buf1_4d, PRsource, (tt-1), 0);
|
|
//G(s,t)
|
|
bufR_4d = bufR_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf1_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf1_4d;
|
|
//A++*exp(a(s+t))
|
|
bufR_4d = bufR_4d + App * exp(a*ss) * exp(a*tt) * signW * buf1_4d ;
|
|
//A+-*exp(a(s-t))
|
|
bufR_4d = bufR_4d + ABpm * exp(a*ss) * exp(-a*tt) * signW * buf1_4d ;
|
|
//A-+*exp(a(-s+t))
|
|
bufR_4d = bufR_4d + ABpm * exp(-a*ss) * exp(a*tt) * signW * buf1_4d ;
|
|
//A--*exp(a(-s-t))
|
|
bufR_4d = bufR_4d + Amm * exp(-a*ss) * exp(-a*tt) * signW * buf1_4d ;
|
|
|
|
//GL
|
|
buf2_4d = zero;
|
|
ExtractSlice(buf2_4d, PLsource, (tt-1), 0);
|
|
//G(s,t)
|
|
bufL_4d = bufL_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf2_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf2_4d;
|
|
//B++*exp(a(s+t))
|
|
bufL_4d = bufL_4d + Bpp * exp(a*ss) * exp(a*tt) * signW * buf2_4d ;
|
|
//B+-*exp(a(s-t))
|
|
bufL_4d = bufL_4d + ABpm * exp(a*ss) * exp(-a*tt) * signW * buf2_4d ;
|
|
//B-+*exp(a(-s+t))
|
|
bufL_4d = bufL_4d + ABpm * exp(-a*ss) * exp(a*tt) * signW * buf2_4d ;
|
|
//B--*exp(a(-s-t))
|
|
bufL_4d = bufL_4d + Bmm * exp(-a*ss) * exp(-a*tt) * signW * buf2_4d ;
|
|
}
|
|
InsertSlice(bufR_4d, GR, (ss-1), 0);
|
|
InsertSlice(bufL_4d, GL, (ss-1), 0);
|
|
}
|
|
|
|
//calculate propagator
|
|
for(unsigned int ss=1;ss<=Ls;ss++)
|
|
{
|
|
bufR_4d = zero;
|
|
bufL_4d = zero;
|
|
|
|
//(i*gamma_mu*sin(p_mu) - W)*(GL*P- source)
|
|
buf1_4d = zero;
|
|
ExtractSlice(buf1_4d, GL, (ss-1), 0);
|
|
buf2_4d = zero;
|
|
for(int mu=0;mu<Nd;mu++) {
|
|
LatticeCoordinate(kmu,mu);
|
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
|
kmu = TwoPiL * kmu + TwoPiL * one * twist[mu];//twisted boundary
|
|
buf2_4d = buf2_4d + sin(kmu)*ci*(Gamma(Gmu[mu])*buf1_4d);
|
|
}
|
|
bufL_4d = buf2_4d - W * buf1_4d;
|
|
|
|
//(i*gamma_mu*sin(p_mu) - W)*(GR*P+ source)
|
|
buf1_4d = zero;
|
|
ExtractSlice(buf1_4d, GR, (ss-1), 0);
|
|
buf2_4d = zero;
|
|
for(int mu=0;mu<Nd;mu++) {
|
|
LatticeCoordinate(kmu,mu);
|
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
|
kmu = TwoPiL * kmu + TwoPiL * one * twist[mu];//twisted boundary
|
|
buf2_4d = buf2_4d + sin(kmu)*ci*(Gamma(Gmu[mu])*buf1_4d);
|
|
}
|
|
bufR_4d = buf2_4d - W * buf1_4d;
|
|
|
|
//(delta(s-1,u) - m*delta(s,1)*delta(u,Ls))*GL
|
|
if(ss==1){
|
|
ExtractSlice(buf1_4d, GL, (Ls-1), 0);
|
|
bufL_4d = bufL_4d - mass*buf1_4d;
|
|
}
|
|
else {
|
|
ExtractSlice(buf1_4d, GL, (ss-2), 0);
|
|
bufL_4d = bufL_4d + buf1_4d;
|
|
}
|
|
|
|
//(delta(s+1,u) - m*delta(s,Ls)*delta(u,1))*GR
|
|
if(ss==Ls){
|
|
ExtractSlice(buf1_4d, GR, 0, 0);
|
|
bufR_4d = bufR_4d - mass*buf1_4d;
|
|
}
|
|
else {
|
|
ExtractSlice(buf1_4d, GR, ss, 0);
|
|
bufR_4d = bufR_4d + buf1_4d;
|
|
}
|
|
buf1_4d = bufL_4d + bufR_4d;
|
|
InsertSlice(buf1_4d, out, (ss-1), 0);
|
|
}
|
|
|
|
|
|
out = out * (-1.0);
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const FermionField &in, RealD mass,std::vector<double> twist)
|
|
{
|
|
// what type LatticeComplex
|
|
GridBase *_grid = _FourDimGrid;
|
|
conformable(_grid,out._grid);
|
|
|
|
typedef typename FermionField::vector_type vector_type;
|
|
typedef typename FermionField::scalar_type ScalComplex;
|
|
typedef iSinglet<ScalComplex> Tcomplex;
|
|
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
|
|
|
Gamma::Algebra Gmu [] = {
|
|
Gamma::Algebra::GammaX,
|
|
Gamma::Algebra::GammaY,
|
|
Gamma::Algebra::GammaZ,
|
|
Gamma::Algebra::GammaT
|
|
};
|
|
|
|
std::vector<int> latt_size = _grid->_fdimensions;
|
|
|
|
|
|
FermionField num (_grid); num = zero;
|
|
|
|
LatComplex sk(_grid); sk = zero;
|
|
LatComplex sk2(_grid); sk2= zero;
|
|
LatComplex W(_grid); W= zero;
|
|
LatComplex a(_grid); a= zero;
|
|
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
|
LatComplex denom(_grid); denom= zero;
|
|
LatComplex cosha(_grid);
|
|
LatComplex kmu(_grid);
|
|
LatComplex Wea(_grid);
|
|
LatComplex Wema(_grid);
|
|
|
|
ScalComplex ci(0.0,1.0);
|
|
|
|
for(int mu=0;mu<Nd;mu++) {
|
|
|
|
LatticeCoordinate(kmu,mu);
|
|
|
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
|
|
|
kmu = TwoPiL * kmu;
|
|
kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
|
|
|
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
|
|
sk = sk + sin(kmu) *sin(kmu);
|
|
|
|
num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in);
|
|
|
|
}
|
|
|
|
W = one - M5 + sk2;
|
|
|
|
////////////////////////////////////////////
|
|
// Cosh alpha -> alpha
|
|
////////////////////////////////////////////
|
|
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
|
|
|
// FIXME Need a Lattice acosh
|
|
for(int idx=0;idx<_grid->lSites();idx++){
|
|
std::vector<int> lcoor(Nd);
|
|
Tcomplex cc;
|
|
RealD sgn;
|
|
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
|
peekLocalSite(cc,cosha,lcoor);
|
|
assert((double)real(cc)>=1.0);
|
|
assert(fabs((double)imag(cc))<=1.0e-15);
|
|
cc = ScalComplex(::acosh(real(cc)),0.0);
|
|
pokeLocalSite(cc,a,lcoor);
|
|
}
|
|
|
|
Wea = ( exp( a) * abs(W) );
|
|
Wema= ( exp(-a) * abs(W) );
|
|
|
|
num = num + ( one - Wema ) * mass * in;
|
|
denom= ( Wea - one ) + mass*mass * (one - Wema);
|
|
out = num/denom;
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist)
|
|
{
|
|
Gamma::Algebra Gmu [] = {
|
|
Gamma::Algebra::GammaX,
|
|
Gamma::Algebra::GammaY,
|
|
Gamma::Algebra::GammaZ,
|
|
Gamma::Algebra::GammaT
|
|
};
|
|
|
|
GridBase *_grid = _FourDimGrid;
|
|
conformable(_grid,out._grid);
|
|
|
|
typedef typename FermionField::vector_type vector_type;
|
|
typedef typename FermionField::scalar_type ScalComplex;
|
|
|
|
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
|
|
|
|
|
std::vector<int> latt_size = _grid->_fdimensions;
|
|
|
|
LatComplex sk(_grid); sk = zero;
|
|
LatComplex sk2(_grid); sk2= zero;
|
|
|
|
LatComplex w_k(_grid); w_k= zero;
|
|
LatComplex b_k(_grid); b_k= zero;
|
|
|
|
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
|
|
|
FermionField num (_grid); num = zero;
|
|
LatComplex denom(_grid); denom= zero;
|
|
LatComplex kmu(_grid);
|
|
ScalComplex ci(0.0,1.0);
|
|
|
|
for(int mu=0;mu<Nd;mu++) {
|
|
|
|
LatticeCoordinate(kmu,mu);
|
|
|
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
|
|
|
kmu = TwoPiL * kmu;
|
|
kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
|
|
|
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
|
|
sk = sk + sin(kmu)*sin(kmu);
|
|
|
|
num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in);
|
|
|
|
}
|
|
num = num + mass * in ;
|
|
|
|
b_k = sk2 - M5;
|
|
|
|
w_k = sqrt(sk + b_k*b_k);
|
|
|
|
denom= ( w_k + b_k + mass*mass) ;
|
|
|
|
denom= one/denom;
|
|
out = num*denom;
|
|
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* Conserved current utilities for Wilson fermions, for contracting propagators
|
|
* to make a conserved current sink or inserting the conserved current
|
|
* sequentially.
|
|
******************************************************************************/
|
|
|
|
// Helper macro to reverse Simd vector. Fixme: slow, generic implementation.
|
|
#define REVERSE_LS(qSite, qSiteRev, Nsimd) \
|
|
{ \
|
|
std::vector<typename SitePropagator::scalar_object> qSiteVec(Nsimd); \
|
|
extract(qSite, qSiteVec); \
|
|
for (int i = 0; i < Nsimd / 2; ++i) \
|
|
{ \
|
|
typename SitePropagator::scalar_object tmp = qSiteVec[i]; \
|
|
qSiteVec[i] = qSiteVec[Nsimd - i - 1]; \
|
|
qSiteVec[Nsimd - i - 1] = tmp; \
|
|
} \
|
|
merge(qSiteRev, qSiteVec); \
|
|
}
|
|
|
|
template <class Impl>
|
|
void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
|
PropagatorField &q_in_2,
|
|
PropagatorField &q_out,
|
|
Current curr_type,
|
|
unsigned int mu)
|
|
{
|
|
conformable(q_in_1._grid, FermionGrid());
|
|
conformable(q_in_1._grid, q_in_2._grid);
|
|
conformable(_FourDimGrid, q_out._grid);
|
|
PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid());
|
|
unsigned int LLs = q_in_1._grid->_rdimensions[0];
|
|
q_out = zero;
|
|
|
|
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
|
|
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
|
|
tmp1 = Cshift(q_in_1, mu + 1, 1);
|
|
tmp2 = Cshift(q_in_2, mu + 1, 1);
|
|
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
|
|
{
|
|
unsigned int sF1 = sU * LLs;
|
|
unsigned int sF2 = (sU + 1) * LLs - 1;
|
|
|
|
for (unsigned int s = 0; s < LLs; ++s)
|
|
{
|
|
bool axial_sign = ((curr_type == Current::Axial) && \
|
|
(s < (LLs / 2)));
|
|
SitePropagator qSite2, qmuSite2;
|
|
|
|
// If vectorised in 5th dimension, reverse q2 vector to match up
|
|
// sites correctly.
|
|
if (Impl::LsVectorised)
|
|
{
|
|
REVERSE_LS(q_in_2._odata[sF2], qSite2, Ls / LLs);
|
|
REVERSE_LS(tmp2._odata[sF2], qmuSite2, Ls / LLs);
|
|
}
|
|
else
|
|
{
|
|
qSite2 = q_in_2._odata[sF2];
|
|
qmuSite2 = tmp2._odata[sF2];
|
|
}
|
|
Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sF1],
|
|
qSite2,
|
|
q_out._odata[sU],
|
|
Umu, sU, mu, axial_sign);
|
|
Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sF1],
|
|
qmuSite2,
|
|
q_out._odata[sU],
|
|
Umu, sU, mu, axial_sign);
|
|
sF1++;
|
|
sF2--;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
template <class Impl>
|
|
void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|
PropagatorField &q_out,
|
|
Current curr_type,
|
|
unsigned int mu,
|
|
unsigned int tmin,
|
|
unsigned int tmax,
|
|
ComplexField &lattice_cmplx)
|
|
{
|
|
conformable(q_in._grid, FermionGrid());
|
|
conformable(q_in._grid, q_out._grid);
|
|
PropagatorField tmp(GaugeGrid()),tmp2(GaugeGrid());
|
|
unsigned int tshift = (mu == Tp) ? 1 : 0;
|
|
unsigned int LLs = q_in._grid->_rdimensions[0];
|
|
unsigned int LLt = GridDefaultLatt()[Tp];
|
|
|
|
q_out = zero;
|
|
LatticeInteger coords(_FourDimGrid);
|
|
LatticeCoordinate(coords, Tp);
|
|
|
|
|
|
for (unsigned int s = 0; s < LLs; ++s)
|
|
{
|
|
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
|
bool tadpole_sign = (curr_type == Current::Tadpole);
|
|
bool switch_sgn = tadpole_sign || axial_sign;
|
|
|
|
|
|
//forward direction: Need q(x + mu, s)*A(x)
|
|
ExtractSlice(tmp2, q_in, s, 0); //q(x,s)
|
|
tmp = Cshift(tmp2, mu, 1); //q(x+mu,s)
|
|
tmp2 = tmp*lattice_cmplx; //q(x+mu,s)*A(x)
|
|
|
|
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
|
|
{
|
|
// Compute the sequential conserved current insertion only if our simd
|
|
// object contains a timeslice we need.
|
|
vInteger t_mask = ((coords._odata[sU] >= tmin) &&
|
|
(coords._odata[sU] <= tmax));
|
|
Integer timeSlices = Reduce(t_mask);
|
|
|
|
if (timeSlices > 0)
|
|
{
|
|
unsigned int sF = sU * LLs + s;
|
|
Kernels::SeqConservedCurrentSiteFwd(tmp2._odata[sU],
|
|
q_out._odata[sF], Umu, sU,
|
|
mu, t_mask, switch_sgn);
|
|
}
|
|
|
|
}
|
|
|
|
//backward direction: Need q(x - mu, s)*A(x-mu)
|
|
ExtractSlice(tmp2, q_in, s, 0); //q(x,s)
|
|
tmp = lattice_cmplx*tmp2; //q(x,s)*A(x)
|
|
tmp2 = Cshift(tmp, mu, -1); //q(x-mu,s)*A(x-mu,s)
|
|
|
|
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
|
|
{
|
|
vInteger t_mask = ((coords._odata[sU] >= (tmin + tshift)) &&
|
|
(coords._odata[sU] <= (tmax + tshift)));
|
|
|
|
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
|
unsigned int t0 = 0;
|
|
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 ));
|
|
|
|
Integer timeSlices = Reduce(t_mask);
|
|
|
|
if (timeSlices > 0)
|
|
{
|
|
unsigned int sF = sU * LLs + s;
|
|
Kernels::SeqConservedCurrentSiteBwd(tmp2._odata[sU],
|
|
q_out._odata[sF], Umu, sU,
|
|
mu, t_mask, axial_sign);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
FermOpTemplateInstantiate(WilsonFermion5D);
|
|
GparityFermOpTemplateInstantiate(WilsonFermion5D);
|
|
|
|
}}
|
|
|
|
|
|
|