mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 20:57:06 +01:00
Updates in tests to make all of Grid compile
This commit is contained in:
@ -157,7 +157,7 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
Nblock=8;
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
X.checkerboard = B.checkerboard;
|
||||
X.Checkerboard() = B.Checkerboard();
|
||||
conformable(X, B);
|
||||
|
||||
Field tmp(B);
|
||||
@ -336,7 +336,7 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
|
||||
|
||||
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
Psi.checkerboard = Src.checkerboard;
|
||||
Psi.Checkerboard() = Src.Checkerboard();
|
||||
conformable(Psi, Src);
|
||||
|
||||
Field P(Src);
|
||||
@ -515,7 +515,7 @@ void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
X[b].checkerboard = B[b].checkerboard;
|
||||
X[b].Checkerboard() = B[b].Checkerboard();
|
||||
conformable(X[b], B[b]);
|
||||
conformable(X[b], X[0]);
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
/*************************************************************************************
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
@ -24,197 +24,198 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_COMPARISON_H
|
||||
#define GRID_COMPARISON_H
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////
|
||||
// This implementation is a bit poor.
|
||||
//
|
||||
// Only support relational logical operations (<, > etc)
|
||||
// on scalar objects. Therefore can strip any tensor structures.
|
||||
//
|
||||
// Should guard this with isGridTensor<> enable if?
|
||||
/////////////////////////////////////////
|
||||
//
|
||||
// Generic list of functors
|
||||
//
|
||||
template<class lobj,class robj> class veq {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vne {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vlt {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vle {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vgt {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vge {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
};
|
||||
/////////////////////////////////////////
|
||||
// This implementation is a bit poor.
|
||||
//
|
||||
// Only support relational logical operations (<, > etc)
|
||||
// on scalar objects. Therefore can strip any tensor structures.
|
||||
//
|
||||
// Should guard this with isGridTensor<> enable if?
|
||||
/////////////////////////////////////////
|
||||
//
|
||||
// Generic list of functors
|
||||
//
|
||||
template<class lobj,class robj> class veq {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vne {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vlt {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vle {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vgt {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vge {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
};
|
||||
|
||||
// Generic list of functors
|
||||
template<class lobj,class robj> class seq {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sne {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class slt {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sle {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sgt {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sge {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
};
|
||||
// Generic list of functors
|
||||
template<class lobj,class robj> class seq {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sne {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class slt {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sle {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sgt {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sge {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Integer and real get extra relational functions.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
accelerator_inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
ExtractBuffer<scalar> vrhs(vsimd::Nsimd());
|
||||
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(lhs,vlhs);
|
||||
extract<vsimd,scalar>(rhs,vrhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(vlhs[s],vrhs[s]);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Integer and real get extra relational functions.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
ExtractBuffer<scalar> vrhs(vsimd::Nsimd());
|
||||
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(lhs,vlhs);
|
||||
extract<vsimd,scalar>(rhs,vrhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(vlhs[s],vrhs[s]);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
accelerator_inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(lhs,vlhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(vlhs[s],rhs);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(lhs,vlhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(vlhs[s],rhs);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
accelerator_inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(rhs,vrhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(lhs,vrhs[s]);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(rhs,vrhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(lhs,vrhs[s]);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd,IfSimd<vsimd> = 0> \
|
||||
accelerator_inline vInteger operator op (const vsimd & lhs, const vsimd & rhs) \
|
||||
{ \
|
||||
typedef typename vsimd::scalar_type scalar; \
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
|
||||
} \
|
||||
template<class vsimd,IfSimd<vsimd> = 0> \
|
||||
accelerator_inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \
|
||||
{ \
|
||||
typedef typename vsimd::scalar_type scalar; \
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
|
||||
} \
|
||||
template<class vsimd,IfSimd<vsimd> = 0> \
|
||||
accelerator_inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \
|
||||
{ \
|
||||
typedef typename vsimd::scalar_type scalar; \
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
|
||||
} \
|
||||
template<class vsimd> \
|
||||
accelerator_inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
} \
|
||||
template<class vsimd> \
|
||||
accelerator_inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
{ \
|
||||
return lhs._internal op rhs; \
|
||||
} \
|
||||
template<class vsimd> \
|
||||
accelerator_inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
||||
{\
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \
|
||||
{\
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \
|
||||
{\
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
{ \
|
||||
return lhs._internal op rhs; \
|
||||
} \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
} \
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) DECLARE_RELATIONAL_EQ(op,functor)
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
}
|
||||
|
||||
DECLARE_RELATIONAL(<,slt);
|
||||
DECLARE_RELATIONAL(<=,sle);
|
||||
@ -228,4 +229,4 @@ DECLARE_RELATIONAL(!=,sne);
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -477,7 +477,7 @@ static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Ortho
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = rhs.Grid()->GlobalDimensions()[Orthog];
|
||||
Vector<ComplexD> ip(Nblock);
|
||||
std::vector<ComplexD> ip(Nblock);
|
||||
sn.resize(Nblock);
|
||||
|
||||
sliceInnerProductVector(ip,rhs,rhs,Orthog);
|
||||
@ -586,6 +586,10 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
auto X_v=X.View();
|
||||
auto Y_v=Y.View();
|
||||
auto R_v=R.View();
|
||||
thread_region
|
||||
{
|
||||
Vector<vobj> s_x(Nblock);
|
||||
@ -595,16 +599,16 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
s_x[i] = X_v[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = Y[o+i*ostride];
|
||||
dot = Y_v[o+i*ostride];
|
||||
for(int j=0;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
R_v[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
@ -635,6 +639,8 @@ static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
auto R_v = R.View();
|
||||
auto X_v = X.View();
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
@ -645,7 +651,7 @@ static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
s_x[i] = X_v[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
@ -654,7 +660,7 @@ static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<
|
||||
for(int j=1;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
R_v[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
@ -692,6 +698,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
|
||||
|
||||
typedef typename vobj::vector_typeD vector_typeD;
|
||||
|
||||
auto lhs_v=lhs.View();
|
||||
auto rhs_v=rhs.View();
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> Left(Nblock);
|
||||
@ -704,8 +712,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
Left [i] = lhs[o+i*ostride];
|
||||
Right[i] = rhs[o+i*ostride];
|
||||
Left [i] = lhs_v[o+i*ostride];
|
||||
Right[i] = rhs_v[o+i*ostride];
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
|
@ -63,7 +63,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
}
|
||||
|
||||
virtual void Meooe(const FermionField &in, FermionField &out) {
|
||||
if (in.checkerboard == Odd) {
|
||||
if (in.Checkerboard() == Odd) {
|
||||
this->DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
this->DhopOE(in, out, DaggerNo);
|
||||
@ -71,7 +71,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
}
|
||||
|
||||
virtual void MeooeDag(const FermionField &in, FermionField &out) {
|
||||
if (in.checkerboard == Odd) {
|
||||
if (in.Checkerboard() == Odd) {
|
||||
this->DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
this->DhopOE(in, out, DaggerYes);
|
||||
@ -80,7 +80,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
|
||||
// allow override for twisted mass and clover
|
||||
virtual void Mooee(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
//axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
ComplexD a = 4.0+this->mass[s];
|
||||
@ -90,7 +90,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
}
|
||||
|
||||
virtual void MooeeDag(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
ComplexD a = 4.0+this->mass[s];
|
||||
ComplexD b(0.0,-this->mu[s]);
|
||||
@ -121,9 +121,9 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
}
|
||||
|
||||
virtual RealD M(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
this->Dhop(in, out, DaggerNo);
|
||||
FermionField tmp(out._grid);
|
||||
FermionField tmp(out.Grid());
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
ComplexD a = 4.0+this->mass[s];
|
||||
ComplexD b(0.0,this->mu[s]);
|
||||
|
@ -81,16 +81,20 @@ public:
|
||||
|
||||
virtual RealD S(const Field &p)
|
||||
{
|
||||
assert(p._grid->Nd() == Ndim);
|
||||
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
|
||||
assert(p.Grid()->Nd() == Ndim);
|
||||
static Stencil phiStencil(p.Grid(), npoint, 0, directions, displacements);
|
||||
phiStencil.HaloExchange(p, compressor);
|
||||
Field action(p._grid), pshift(p._grid), phisquared(p._grid);
|
||||
Field action(p.Grid()), pshift(p.Grid()), phisquared(p.Grid());
|
||||
phisquared = p * p;
|
||||
action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared;
|
||||
|
||||
|
||||
auto p_v = p.View();
|
||||
auto action_v = action.View();
|
||||
for (int mu = 0; mu < Ndim; mu++)
|
||||
{
|
||||
// pshift = Cshift(p, mu, +1); // not efficient, implement with stencils
|
||||
parallel_for(int i = 0; i < p._grid->oSites(); i++)
|
||||
parallel_for(int i = 0; i < p.Grid()->oSites(); i++)
|
||||
{
|
||||
int permute_type;
|
||||
StencilEntry *SE;
|
||||
@ -98,23 +102,20 @@ public:
|
||||
const vobj *temp, *t_p;
|
||||
|
||||
SE = phiStencil.GetEntry(permute_type, mu, i);
|
||||
t_p = &p._odata[i];
|
||||
t_p = &p_v[i];
|
||||
if (SE->_is_local)
|
||||
{
|
||||
temp = &p._odata[SE->_offset];
|
||||
if (SE->_permute)
|
||||
{
|
||||
temp = &p_v[SE->_offset];
|
||||
if (SE->_permute) {
|
||||
permute(temp2, *temp, permute_type);
|
||||
action._odata[i] -= temp2 * (*t_p) + (*t_p) * temp2;
|
||||
}
|
||||
else
|
||||
{
|
||||
action._odata[i] -= (*temp) * (*t_p) + (*t_p) * (*temp);
|
||||
action_v[i] -= temp2 * (*t_p) + (*t_p) * temp2;
|
||||
} else {
|
||||
action_v[i] -= (*temp) * (*t_p) + (*t_p) * (*temp);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
action._odata[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset];
|
||||
action_v[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset];
|
||||
}
|
||||
}
|
||||
// action -= pshift*p + p*pshift;
|
||||
@ -127,12 +128,12 @@ public:
|
||||
virtual void deriv(const Field &p, Field &force)
|
||||
{
|
||||
double t0 = usecond();
|
||||
assert(p._grid->Nd() == Ndim);
|
||||
assert(p.Grid()->Nd() == Ndim);
|
||||
force = (2. * Ndim + mass_square) * p - 2. * lambda * p * p * p;
|
||||
double interm_t = usecond();
|
||||
|
||||
// move this outside
|
||||
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
|
||||
static Stencil phiStencil(p.Grid(), npoint, 0, directions, displacements);
|
||||
|
||||
phiStencil.HaloExchange(p, compressor);
|
||||
double halo_t = usecond();
|
||||
@ -145,59 +146,51 @@ public:
|
||||
for (int point = 0; point < npoint; point++)
|
||||
{
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
int permute_type;
|
||||
StencilEntry *SE;
|
||||
const vobj *temp;
|
||||
auto p_v = p.View();
|
||||
auto force_v = force.View();
|
||||
|
||||
int permute_type;
|
||||
StencilEntry *SE;
|
||||
const vobj *temp;
|
||||
|
||||
#pragma omp for schedule(static, chunk)
|
||||
for (int i = 0; i < p._grid->oSites(); i++)
|
||||
{
|
||||
SE = phiStencil.GetEntry(permute_type, point, i);
|
||||
// prefetch next p?
|
||||
|
||||
if (SE->_is_local)
|
||||
{
|
||||
temp = &p._odata[SE->_offset];
|
||||
|
||||
if (SE->_permute)
|
||||
{
|
||||
parallel_for (int i = 0; i < p.Grid()->oSites(); i++) {
|
||||
|
||||
SE = phiStencil.GetEntry(permute_type, point, i);
|
||||
// prefetch next p?
|
||||
|
||||
if (SE->_is_local) {
|
||||
temp = &p_v[SE->_offset];
|
||||
|
||||
if (SE->_permute) {
|
||||
vobj temp2;
|
||||
permute(temp2, *temp, permute_type);
|
||||
force._odata[i] -= temp2;
|
||||
force_v[i] -= temp2;
|
||||
} else {
|
||||
force_v[i] -= *temp; // slow part. Dominated by this read/write (BW)
|
||||
}
|
||||
else
|
||||
{
|
||||
force._odata[i] -= *temp; // slow part. Dominated by this read/write (BW)
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
force._odata[i] -= phiStencil.CommBuf()[SE->_offset];
|
||||
} else {
|
||||
force_v[i] -= phiStencil.CommBuf()[SE->_offset];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
force *= N / g;
|
||||
force *= N / g;
|
||||
|
||||
double t1 = usecond();
|
||||
double total_time = (t1 - t0) / 1e6;
|
||||
double interm_time = (interm_t - t0) / 1e6;
|
||||
double halo_time = (halo_t - interm_t) / 1e6;
|
||||
double stencil_time = (t1 - halo_t) / 1e6;
|
||||
std::cout << GridLogIntegrator << "Total time for force computation (s) : " << total_time << std::endl;
|
||||
std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl;
|
||||
std::cout << GridLogIntegrator << "Halo time in force computation (s) : " << halo_time << std::endl;
|
||||
std::cout << GridLogIntegrator << "Stencil time in force computation (s) : " << stencil_time << std::endl;
|
||||
double flops = p._grid->gSites() * (14 * N * N * N + 18 * N * N + 2);
|
||||
double flops_no_stencil = p._grid->gSites() * (14 * N * N * N + 6 * N * N + 2);
|
||||
double Gflops = flops / (total_time * 1e9);
|
||||
double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9);
|
||||
std::cout << GridLogIntegrator << "Flops: " << flops << " - Gflop/s : " << Gflops << std::endl;
|
||||
std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << " - Gflop/s NS: " << Gflops_no_stencil << std::endl;
|
||||
}
|
||||
double t1 = usecond();
|
||||
double total_time = (t1 - t0) / 1e6;
|
||||
double interm_time = (interm_t - t0) / 1e6;
|
||||
double halo_time = (halo_t - interm_t) / 1e6;
|
||||
double stencil_time = (t1 - halo_t) / 1e6;
|
||||
std::cout << GridLogIntegrator << "Total time for force computation (s) : " << total_time << std::endl;
|
||||
std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl;
|
||||
std::cout << GridLogIntegrator << "Halo time in force computation (s) : " << halo_time << std::endl;
|
||||
std::cout << GridLogIntegrator << "Stencil time in force computation (s) : " << stencil_time << std::endl;
|
||||
double flops = p.Grid()->gSites() * (14 * N * N * N + 18 * N * N + 2);
|
||||
double flops_no_stencil = p.Grid()->gSites() * (14 * N * N * N + 6 * N * N + 2);
|
||||
double Gflops = flops / (total_time * 1e9);
|
||||
double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9);
|
||||
std::cout << GridLogIntegrator << "Flops: " << flops << " - Gflop/s : " << Gflops << std::endl;
|
||||
std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << " - Gflop/s NS: " << Gflops_no_stencil << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -73,7 +73,7 @@ public:
|
||||
if ((traj % Params.saveInterval) == 0) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
GridBase *grid = U._grid;
|
||||
GridBase *grid = U.Grid();
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
IldgWriter _IldgWriter(grid->IsBoss());
|
||||
|
@ -75,7 +75,7 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
|
||||
if ((traj % Params.saveInterval) == 0) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
GridBase *grid = U._grid;
|
||||
GridBase *grid = U.Grid();
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
ScidacWriter _ScidacWriter(grid->IsBoss());
|
||||
|
@ -128,12 +128,12 @@ public:
|
||||
// average over all x,y,z the temporal loop
|
||||
//////////////////////////////////////////////////
|
||||
static ComplexD avgPolyakovLoop(const GaugeField &Umu) { //assume Nd=4
|
||||
GaugeMat Ut(Umu._grid), P(Umu._grid);
|
||||
GaugeMat Ut(Umu.Grid()), P(Umu.Grid());
|
||||
ComplexD out;
|
||||
int T = Umu._grid->GlobalDimensions()[3];
|
||||
int X = Umu._grid->GlobalDimensions()[0];
|
||||
int Y = Umu._grid->GlobalDimensions()[1];
|
||||
int Z = Umu._grid->GlobalDimensions()[2];
|
||||
int T = Umu.Grid()->GlobalDimensions()[3];
|
||||
int X = Umu.Grid()->GlobalDimensions()[0];
|
||||
int Y = Umu.Grid()->GlobalDimensions()[1];
|
||||
int Z = Umu.Grid()->GlobalDimensions()[2];
|
||||
|
||||
Ut = peekLorentz(Umu,3); //Select temporal direction
|
||||
P = Ut;
|
||||
|
@ -55,13 +55,13 @@ LOGICAL_BINOP(||);
|
||||
LOGICAL_BINOP(&&);
|
||||
|
||||
template <class T>
|
||||
strong_inline bool operator==(const iScalar<T> &t1, const iScalar<T> &t2)
|
||||
accelerator_inline bool operator==(const iScalar<T> &t1, const iScalar<T> &t2)
|
||||
{
|
||||
return (t1._internal == t2._internal);
|
||||
}
|
||||
|
||||
template <class T, int N>
|
||||
strong_inline bool operator==(const iVector<T, N> &t1, const iVector<T, N> &t2)
|
||||
accelerator_inline bool operator==(const iVector<T, N> &t1, const iVector<T, N> &t2)
|
||||
{
|
||||
bool res = true;
|
||||
|
||||
@ -74,7 +74,7 @@ strong_inline bool operator==(const iVector<T, N> &t1, const iVector<T, N> &t2)
|
||||
}
|
||||
|
||||
template <class T, int N>
|
||||
strong_inline bool operator==(const iMatrix<T, N> &t1, const iMatrix<T, N> &t2)
|
||||
accelerator_inline bool operator==(const iMatrix<T, N> &t1, const iMatrix<T, N> &t2)
|
||||
{
|
||||
bool res = true;
|
||||
|
||||
|
Reference in New Issue
Block a user