mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 11:15:55 +01:00
Merge branch 'develop' into feature/gparity_HMC
This commit is contained in:
commit
752f70cd48
@ -122,8 +122,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
||||||
cshiftVector<vobj> send_buf(buffer_size);
|
static cshiftVector<vobj> send_buf; send_buf.resize(buffer_size);
|
||||||
cshiftVector<vobj> recv_buf(buffer_size);
|
static cshiftVector<vobj> recv_buf; recv_buf.resize(buffer_size);
|
||||||
|
|
||||||
int cb= (cbmask==0x2)? Odd : Even;
|
int cb= (cbmask==0x2)? Odd : Even;
|
||||||
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||||
@ -198,8 +198,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||||
// int words = sizeof(vobj)/sizeof(vector_type);
|
// int words = sizeof(vobj)/sizeof(vector_type);
|
||||||
|
|
||||||
std::vector<cshiftVector<scalar_object> > send_buf_extract(Nsimd);
|
static std::vector<cshiftVector<scalar_object> > send_buf_extract; send_buf_extract.resize(Nsimd);
|
||||||
std::vector<cshiftVector<scalar_object> > recv_buf_extract(Nsimd);
|
static std::vector<cshiftVector<scalar_object> > recv_buf_extract; recv_buf_extract.resize(Nsimd);
|
||||||
scalar_object * recv_buf_extract_mpi;
|
scalar_object * recv_buf_extract_mpi;
|
||||||
scalar_object * send_buf_extract_mpi;
|
scalar_object * send_buf_extract_mpi;
|
||||||
|
|
||||||
@ -294,8 +294,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
||||||
cshiftVector<vobj> send_buf_v(buffer_size);
|
static cshiftVector<vobj> send_buf_v; send_buf_v.resize(buffer_size);
|
||||||
cshiftVector<vobj> recv_buf_v(buffer_size);
|
static cshiftVector<vobj> recv_buf_v; recv_buf_v.resize(buffer_size);
|
||||||
vobj *send_buf;
|
vobj *send_buf;
|
||||||
vobj *recv_buf;
|
vobj *recv_buf;
|
||||||
{
|
{
|
||||||
@ -381,8 +381,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||||
// int words = sizeof(vobj)/sizeof(vector_type);
|
// int words = sizeof(vobj)/sizeof(vector_type);
|
||||||
|
|
||||||
std::vector<cshiftVector<scalar_object> > send_buf_extract(Nsimd);
|
static std::vector<cshiftVector<scalar_object> > send_buf_extract; send_buf_extract.resize(Nsimd);
|
||||||
std::vector<cshiftVector<scalar_object> > recv_buf_extract(Nsimd);
|
static std::vector<cshiftVector<scalar_object> > recv_buf_extract; recv_buf_extract.resize(Nsimd);
|
||||||
scalar_object * recv_buf_extract_mpi;
|
scalar_object * recv_buf_extract_mpi;
|
||||||
scalar_object * send_buf_extract_mpi;
|
scalar_object * send_buf_extract_mpi;
|
||||||
{
|
{
|
||||||
|
@ -128,7 +128,7 @@ inline void MachineCharacteristics(FieldMetaData &header)
|
|||||||
std::time_t t = std::time(nullptr);
|
std::time_t t = std::time(nullptr);
|
||||||
std::tm tm_ = *std::localtime(&t);
|
std::tm tm_ = *std::localtime(&t);
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
// oss << std::put_time(&tm_, "%c %Z");
|
oss << std::put_time(&tm_, "%c %Z");
|
||||||
header.creation_date = oss.str();
|
header.creation_date = oss.str();
|
||||||
header.archive_date = header.creation_date;
|
header.archive_date = header.creation_date;
|
||||||
|
|
||||||
|
@ -236,7 +236,10 @@ public:
|
|||||||
//Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
|
//Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
|
||||||
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
|
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
|
||||||
for(int mu=0;mu<Nd-1;mu++){
|
for(int mu=0;mu<Nd-1;mu++){
|
||||||
LatticeCoordinate(coor,mu);
|
|
||||||
|
if( Params.twists[mu] ){
|
||||||
|
LatticeCoordinate(coor,mu);
|
||||||
|
}
|
||||||
|
|
||||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
U = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
Uconj = conjugate(U);
|
Uconj = conjugate(U);
|
||||||
@ -256,7 +259,7 @@ public:
|
|||||||
thread_foreach(ss,U_v,{
|
thread_foreach(ss,U_v,{
|
||||||
Uds_v[ss](0)(mu) = U_v[ss]();
|
Uds_v[ss](0)(mu) = U_v[ss]();
|
||||||
Uds_v[ss](1)(mu) = Uconj_v[ss]();
|
Uds_v[ss](1)(mu) = Uconj_v[ss]();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
||||||
@ -362,6 +365,13 @@ public:
|
|||||||
int Ls=Btilde.Grid()->_fdimensions[0];
|
int Ls=Btilde.Grid()->_fdimensions[0];
|
||||||
|
|
||||||
{
|
{
|
||||||
|
GridBase *GaugeGrid = mat.Grid();
|
||||||
|
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||||
|
|
||||||
|
if( Params.twists[mu] ){
|
||||||
|
LatticeCoordinate(coor,mu);
|
||||||
|
}
|
||||||
|
|
||||||
autoView( mat_v , mat, AcceleratorWrite);
|
autoView( mat_v , mat, AcceleratorWrite);
|
||||||
autoView( Btilde_v , Btilde, AcceleratorRead);
|
autoView( Btilde_v , Btilde, AcceleratorRead);
|
||||||
autoView( Atilde_v , Atilde, AcceleratorRead);
|
autoView( Atilde_v , Atilde, AcceleratorRead);
|
||||||
@ -373,7 +383,7 @@ public:
|
|||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
int sF = s+Ls*sU;
|
int sF = s+Ls*sU;
|
||||||
for(int spn=0;spn<Ns;spn++){ //sum over spin
|
for(int spn=0;spn<Ns;spn++){ //sum over spin
|
||||||
//Flavor 0
|
//Flavor 0
|
||||||
auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector
|
auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector
|
||||||
auto aa = coalescedRead(Atilde_v[sF](0)(spn) );
|
auto aa = coalescedRead(Atilde_v[sF](0)(spn) );
|
||||||
sum = sum + outerProduct(bb,aa);
|
sum = sum + outerProduct(bb,aa);
|
||||||
|
@ -72,19 +72,23 @@ public:
|
|||||||
|
|
||||||
StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||||
|
|
||||||
static accelerator_inline void multLink(SiteSpinor &phi,
|
template<class _Spinor>
|
||||||
|
static accelerator_inline void multLink(_Spinor &phi,
|
||||||
const SiteDoubledGaugeField &U,
|
const SiteDoubledGaugeField &U,
|
||||||
const SiteSpinor &chi,
|
const _Spinor &chi,
|
||||||
int mu)
|
int mu)
|
||||||
{
|
{
|
||||||
mult(&phi(), &U(mu), &chi());
|
auto UU = coalescedRead(U(mu));
|
||||||
|
mult(&phi(), &UU, &chi());
|
||||||
}
|
}
|
||||||
static accelerator_inline void multLinkAdd(SiteSpinor &phi,
|
template<class _Spinor>
|
||||||
|
static accelerator_inline void multLinkAdd(_Spinor &phi,
|
||||||
const SiteDoubledGaugeField &U,
|
const SiteDoubledGaugeField &U,
|
||||||
const SiteSpinor &chi,
|
const _Spinor &chi,
|
||||||
int mu)
|
int mu)
|
||||||
{
|
{
|
||||||
mac(&phi(), &U(mu), &chi());
|
auto UU = coalescedRead(U(mu));
|
||||||
|
mac(&phi(), &UU, &chi());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class ref>
|
template <class ref>
|
||||||
|
@ -184,18 +184,22 @@ public:
|
|||||||
mat = TraceIndex<SpinIndex>(P);
|
mat = TraceIndex<SpinIndex>(P);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
|
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds)
|
||||||
|
{
|
||||||
for (int mu = 0; mu < Nd; mu++)
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
|
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu)
|
||||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
{
|
||||||
|
#undef USE_OLD_INSERT_FORCE
|
||||||
int Ls=Btilde.Grid()->_fdimensions[0];
|
int Ls=Btilde.Grid()->_fdimensions[0];
|
||||||
|
autoView( mat_v , mat, AcceleratorWrite);
|
||||||
|
#ifdef USE_OLD_INSERT_FORCE
|
||||||
GaugeLinkField tmp(mat.Grid());
|
GaugeLinkField tmp(mat.Grid());
|
||||||
tmp = Zero();
|
tmp = Zero();
|
||||||
{
|
{
|
||||||
|
const int Nsimd = SiteSpinor::Nsimd();
|
||||||
autoView( tmp_v , tmp, AcceleratorWrite);
|
autoView( tmp_v , tmp, AcceleratorWrite);
|
||||||
autoView( Btilde_v , Btilde, AcceleratorRead);
|
autoView( Btilde_v , Btilde, AcceleratorRead);
|
||||||
autoView( Atilde_v , Atilde, AcceleratorRead);
|
autoView( Atilde_v , Atilde, AcceleratorRead);
|
||||||
@ -208,6 +212,29 @@ public:
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
const int Nsimd = SiteSpinor::Nsimd();
|
||||||
|
autoView( Btilde_v , Btilde, AcceleratorRead);
|
||||||
|
autoView( Atilde_v , Atilde, AcceleratorRead);
|
||||||
|
accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
|
||||||
|
int sU=sss;
|
||||||
|
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
|
||||||
|
ColorMatrixType sum;
|
||||||
|
zeroit(sum);
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
int sF = s+Ls*sU;
|
||||||
|
for(int spn=0;spn<Ns;spn++){ //sum over spin
|
||||||
|
auto bb = coalescedRead(Btilde_v[sF]()(spn) ); //color vector
|
||||||
|
auto aa = coalescedRead(Atilde_v[sF]()(spn) );
|
||||||
|
auto op = outerProduct(bb,aa);
|
||||||
|
sum = sum + op;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
coalescedWrite(mat_v[sU](mu)(), sum);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -880,11 +880,23 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<RealD> G_s(Ls,1.0);
|
std::vector<RealD> G_s(Ls,1.0);
|
||||||
|
Integer sign = 1; // sign flip for vector/tadpole
|
||||||
if ( curr_type == Current::Axial ) {
|
if ( curr_type == Current::Axial ) {
|
||||||
for(int s=0;s<Ls/2;s++){
|
for(int s=0;s<Ls/2;s++){
|
||||||
G_s[s] = -1.0;
|
G_s[s] = -1.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if ( curr_type == Current::Tadpole ) {
|
||||||
|
auto b=this->_b;
|
||||||
|
auto c=this->_c;
|
||||||
|
if ( b == 1 && c == 0 ) {
|
||||||
|
sign = -1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::cerr << "Error: Tadpole implementation currently unavailable for non-Shamir actions." << std::endl;
|
||||||
|
assert(b==1 && c==0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
|
|
||||||
@ -907,7 +919,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
|
|
||||||
tmp = Cshift(tmp,mu,1);
|
tmp = Cshift(tmp,mu,1);
|
||||||
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
|
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
|
||||||
tmp = G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
|
tmp = sign*G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
|
||||||
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
|
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
|
||||||
L_Q = where((lcoor<=tmax),tmp,zz); // Position of current complicated
|
L_Q = where((lcoor<=tmax),tmp,zz); // Position of current complicated
|
||||||
|
|
||||||
|
@ -35,39 +35,32 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \
|
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \
|
||||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||||
if (SE->_is_local ) { \
|
if (SE->_is_local ) { \
|
||||||
if (SE->_permute) { \
|
int perm= SE->_permute; \
|
||||||
chi_p = χ \
|
chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\
|
||||||
permute(chi, in[SE->_offset], ptype); \
|
|
||||||
} else { \
|
|
||||||
chi_p = &in[SE->_offset]; \
|
|
||||||
} \
|
|
||||||
} else { \
|
} else { \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
} \
|
} \
|
||||||
multLink(Uchi, U[sU], *chi_p, Dir);
|
acceleratorSynchronise(); \
|
||||||
|
multLink(Uchi, U[sU], chi, Dir);
|
||||||
|
|
||||||
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \
|
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \
|
||||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||||
if (SE->_is_local ) { \
|
if (SE->_is_local ) { \
|
||||||
if (SE->_permute) { \
|
int perm= SE->_permute; \
|
||||||
chi_p = χ \
|
chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\
|
||||||
permute(chi, in[SE->_offset], ptype); \
|
|
||||||
} else { \
|
|
||||||
chi_p = &in[SE->_offset]; \
|
|
||||||
} \
|
|
||||||
} else if ( st.same_node[Dir] ) { \
|
} else if ( st.same_node[Dir] ) { \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
} \
|
} \
|
||||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||||
multLink(Uchi, U[sU], *chi_p, Dir); \
|
multLink(Uchi, U[sU], chi, Dir); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
|
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
|
||||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||||
nmu++; \
|
nmu++; \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
multLink(Uchi, U[sU], *chi_p, Dir); \
|
multLink(Uchi, U[sU], chi, Dir); \
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -84,12 +77,14 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
|
|||||||
SiteSpinor *buf, int sF, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out, int dag)
|
const FermionFieldView &in, FermionFieldView &out, int dag)
|
||||||
{
|
{
|
||||||
const SiteSpinor *chi_p;
|
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||||
SiteSpinor chi;
|
calcSpinor chi;
|
||||||
SiteSpinor Uchi;
|
calcSpinor Uchi;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
int skew;
|
int skew;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
|
|
||||||
// for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
//
|
//
|
||||||
@ -118,7 +113,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
|
|||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
Uchi = - Uchi;
|
Uchi = - Uchi;
|
||||||
}
|
}
|
||||||
vstream(out[sF], Uchi);
|
coalescedWrite(out[sF], Uchi,lane);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -130,13 +125,16 @@ template <int Naik> accelerator_inline
|
|||||||
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
|
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int sF, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
const SiteSpinor *chi_p;
|
{
|
||||||
SiteSpinor chi;
|
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||||
SiteSpinor Uchi;
|
calcSpinor chi;
|
||||||
|
calcSpinor Uchi;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
int skew ;
|
int skew ;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
|
|
||||||
// for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
// int sF=LLs*sU+s;
|
// int sF=LLs*sU+s;
|
||||||
@ -165,7 +163,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
|
|||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
Uchi = - Uchi;
|
Uchi = - Uchi;
|
||||||
}
|
}
|
||||||
vstream(out[sF], Uchi);
|
coalescedWrite(out[sF], Uchi,lane);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -178,14 +176,17 @@ template <int Naik> accelerator_inline
|
|||||||
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
|
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int sF, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
const SiteSpinor *chi_p;
|
{
|
||||||
// SiteSpinor chi;
|
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||||
SiteSpinor Uchi;
|
calcSpinor chi;
|
||||||
|
calcSpinor Uchi;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
int nmu=0;
|
int nmu=0;
|
||||||
int skew ;
|
int skew ;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
|
|
||||||
// for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
// int sF=LLs*sU+s;
|
// int sF=LLs*sU+s;
|
||||||
@ -212,10 +213,11 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
|
|||||||
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
|
||||||
}
|
}
|
||||||
if ( nmu ) {
|
if ( nmu ) {
|
||||||
|
auto _out = coalescedRead(out[sF],lane);
|
||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
out[sF] = out[sF] - Uchi;
|
coalescedWrite(out[sF], _out-Uchi,lane);
|
||||||
} else {
|
} else {
|
||||||
out[sF] = out[sF] + Uchi;
|
coalescedWrite(out[sF], _out+Uchi,lane);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -261,6 +263,8 @@ void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
GridBase *FGrid=in.Grid();
|
GridBase *FGrid=in.Grid();
|
||||||
GridBase *UGrid=U.Grid();
|
GridBase *UGrid=U.Grid();
|
||||||
typedef StaggeredKernels<Impl> ThisKernel;
|
typedef StaggeredKernels<Impl> ThisKernel;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
autoView( UUU_v , UUU, AcceleratorRead);
|
autoView( UUU_v , UUU, AcceleratorRead);
|
||||||
autoView( U_v , U, AcceleratorRead);
|
autoView( U_v , U, AcceleratorRead);
|
||||||
autoView( in_v , in, AcceleratorRead);
|
autoView( in_v , in, AcceleratorRead);
|
||||||
@ -301,6 +305,8 @@ void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
GridBase *FGrid=in.Grid();
|
GridBase *FGrid=in.Grid();
|
||||||
GridBase *UGrid=U.Grid();
|
GridBase *UGrid=U.Grid();
|
||||||
typedef StaggeredKernels<Impl> ThisKernel;
|
typedef StaggeredKernels<Impl> ThisKernel;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
autoView( UUU_v , U, AcceleratorRead);
|
autoView( UUU_v , U, AcceleratorRead);
|
||||||
autoView( U_v , U, AcceleratorRead);
|
autoView( U_v , U, AcceleratorRead);
|
||||||
autoView( in_v , in, AcceleratorRead);
|
autoView( in_v , in, AcceleratorRead);
|
||||||
|
@ -34,6 +34,16 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// outerProduct Scalar x Scalar -> Scalar
|
// outerProduct Scalar x Scalar -> Scalar
|
||||||
// Vector x Vector -> Matrix
|
// Vector x Vector -> Matrix
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class CC,IfComplex<CC> = 0>
|
||||||
|
accelerator_inline CC outerProduct(const CC &l, const CC& r)
|
||||||
|
{
|
||||||
|
return l*conj(r);
|
||||||
|
}
|
||||||
|
template<class RR,IfReal<RR> = 0>
|
||||||
|
accelerator_inline RR outerProduct(const RR &l, const RR& r)
|
||||||
|
{
|
||||||
|
return l*r;
|
||||||
|
}
|
||||||
|
|
||||||
template<class CC,IfComplex<CC> = 0>
|
template<class CC,IfComplex<CC> = 0>
|
||||||
accelerator_inline CC outerProduct(const CC &l, const CC& r)
|
accelerator_inline CC outerProduct(const CC &l, const CC& r)
|
||||||
@ -68,7 +78,6 @@ auto outerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<declt
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -29,7 +29,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -59,6 +58,10 @@ int main (int argc, char ** argv)
|
|||||||
double beta = 1.0;
|
double beta = 1.0;
|
||||||
double c1 = 0.331;
|
double c1 = 0.331;
|
||||||
|
|
||||||
|
const int nu = 1;
|
||||||
|
std::vector<int> twists(Nd,0);
|
||||||
|
twists[nu] = 1;
|
||||||
|
ConjugateGimplD::setDirections(twists);
|
||||||
ConjugatePlaqPlusRectangleActionR Action(beta,c1);
|
ConjugatePlaqPlusRectangleActionR Action(beta,c1);
|
||||||
//ConjugateWilsonGaugeActionR Action(beta);
|
//ConjugateWilsonGaugeActionR Action(beta);
|
||||||
//WilsonGaugeActionR Action(beta);
|
//WilsonGaugeActionR Action(beta);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user