1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 20:57:06 +01:00

Updates in tests to make all of Grid compile

This commit is contained in:
Peter Boyle
2018-12-14 16:55:54 +00:00
parent afc462bd58
commit 422764757d
26 changed files with 388 additions and 399 deletions

View File

@ -157,7 +157,7 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
Nblock=8;
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
X.checkerboard = B.checkerboard;
X.Checkerboard() = B.Checkerboard();
conformable(X, B);
Field tmp(B);
@ -336,7 +336,7 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
Psi.Checkerboard() = Src.Checkerboard();
conformable(Psi, Src);
Field P(Src);
@ -515,7 +515,7 @@ void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
for(int b=0;b<Nblock;b++){
X[b].checkerboard = B[b].checkerboard;
X[b].Checkerboard() = B[b].Checkerboard();
conformable(X[b], B[b]);
conformable(X[b], X[0]);
}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -24,197 +24,198 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_COMPARISON_H
#define GRID_COMPARISON_H
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////
// This implementation is a bit poor.
//
// Only support relational logical operations (<, > etc)
// on scalar objects. Therefore can strip any tensor structures.
//
// Should guard this with isGridTensor<> enable if?
/////////////////////////////////////////
//
// Generic list of functors
//
template<class lobj,class robj> class veq {
public:
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) == (rhs);
}
};
template<class lobj,class robj> class vne {
public:
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) != (rhs);
}
};
template<class lobj,class robj> class vlt {
public:
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) < (rhs);
}
};
template<class lobj,class robj> class vle {
public:
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) <= (rhs);
}
};
template<class lobj,class robj> class vgt {
public:
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) > (rhs);
}
};
template<class lobj,class robj> class vge {
public:
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) >= (rhs);
}
};
/////////////////////////////////////////
// This implementation is a bit poor.
//
// Only support relational logical operations (<, > etc)
// on scalar objects. Therefore can strip any tensor structures.
//
// Should guard this with isGridTensor<> enable if?
/////////////////////////////////////////
//
// Generic list of functors
//
template<class lobj,class robj> class veq {
public:
vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) == (rhs);
}
};
template<class lobj,class robj> class vne {
public:
vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) != (rhs);
}
};
template<class lobj,class robj> class vlt {
public:
vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) < (rhs);
}
};
template<class lobj,class robj> class vle {
public:
vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) <= (rhs);
}
};
template<class lobj,class robj> class vgt {
public:
vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) > (rhs);
}
};
template<class lobj,class robj> class vge {
public:
vInteger operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) >= (rhs);
}
};
// Generic list of functors
template<class lobj,class robj> class seq {
public:
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) == (rhs);
}
};
template<class lobj,class robj> class sne {
public:
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) != (rhs);
}
};
template<class lobj,class robj> class slt {
public:
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) < (rhs);
}
};
template<class lobj,class robj> class sle {
public:
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) <= (rhs);
}
};
template<class lobj,class robj> class sgt {
public:
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) > (rhs);
}
};
template<class lobj,class robj> class sge {
public:
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) >= (rhs);
}
};
// Generic list of functors
template<class lobj,class robj> class seq {
public:
Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) == (rhs);
}
};
template<class lobj,class robj> class sne {
public:
Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) != (rhs);
}
};
template<class lobj,class robj> class slt {
public:
Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) < (rhs);
}
};
template<class lobj,class robj> class sle {
public:
Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) <= (rhs);
}
};
template<class lobj,class robj> class sgt {
public:
Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) > (rhs);
}
};
template<class lobj,class robj> class sge {
public:
Integer operator()(const lobj &lhs, const robj &rhs)
{
return (lhs) >= (rhs);
}
};
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Integer and real get extra relational functions.
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
accelerator_inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs)
{
typedef typename vsimd::scalar_type scalar;
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
ExtractBuffer<scalar> vrhs(vsimd::Nsimd());
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
vInteger ret;
extract<vsimd,scalar>(lhs,vlhs);
extract<vsimd,scalar>(rhs,vrhs);
for(int s=0;s<vsimd::Nsimd();s++){
vpred[s] = sop(vlhs[s],vrhs[s]);
}
merge<vInteger,Integer>(ret,vpred);
return ret;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Integer and real get extra relational functions.
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs)
{
typedef typename vsimd::scalar_type scalar;
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
ExtractBuffer<scalar> vrhs(vsimd::Nsimd());
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
vInteger ret;
extract<vsimd,scalar>(lhs,vlhs);
extract<vsimd,scalar>(rhs,vrhs);
for(int s=0;s<vsimd::Nsimd();s++){
vpred[s] = sop(vlhs[s],vrhs[s]);
}
merge<vInteger,Integer>(ret,vpred);
return ret;
}
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
accelerator_inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs)
{
typedef typename vsimd::scalar_type scalar;
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
vInteger ret;
extract<vsimd,scalar>(lhs,vlhs);
for(int s=0;s<vsimd::Nsimd();s++){
vpred[s] = sop(vlhs[s],rhs);
}
merge<vInteger,Integer>(ret,vpred);
return ret;
}
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs)
{
typedef typename vsimd::scalar_type scalar;
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
vInteger ret;
extract<vsimd,scalar>(lhs,vlhs);
for(int s=0;s<vsimd::Nsimd();s++){
vpred[s] = sop(vlhs[s],rhs);
}
merge<vInteger,Integer>(ret,vpred);
return ret;
}
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
accelerator_inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs)
{
typedef typename vsimd::scalar_type scalar;
ExtractBuffer<scalar> vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
vInteger ret;
extract<vsimd,scalar>(rhs,vrhs);
for(int s=0;s<vsimd::Nsimd();s++){
vpred[s] = sop(lhs,vrhs[s]);
}
merge<vInteger,Integer>(ret,vpred);
return ret;
}
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs)
{
typedef typename vsimd::scalar_type scalar;
ExtractBuffer<scalar> vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
vInteger ret;
extract<vsimd,scalar>(rhs,vrhs);
for(int s=0;s<vsimd::Nsimd();s++){
vpred[s] = sop(lhs,vrhs[s]);
}
merge<vInteger,Integer>(ret,vpred);
return ret;
}
#define DECLARE_RELATIONAL_EQ(op,functor) \
template<class vsimd,IfSimd<vsimd> = 0> \
accelerator_inline vInteger operator op (const vsimd & lhs, const vsimd & rhs) \
{ \
typedef typename vsimd::scalar_type scalar; \
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
} \
template<class vsimd,IfSimd<vsimd> = 0> \
accelerator_inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \
{ \
typedef typename vsimd::scalar_type scalar; \
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
} \
template<class vsimd,IfSimd<vsimd> = 0> \
accelerator_inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \
{ \
typedef typename vsimd::scalar_type scalar; \
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
} \
template<class vsimd> \
accelerator_inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs) \
{ \
return lhs._internal op rhs._internal; \
} \
template<class vsimd> \
accelerator_inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
{ \
return lhs._internal op rhs; \
} \
template<class vsimd> \
accelerator_inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
{ \
return lhs op rhs._internal; \
template<class vsimd,IfSimd<vsimd> = 0>\
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
{\
typedef typename vsimd::scalar_type scalar;\
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
}\
template<class vsimd,IfSimd<vsimd> = 0>\
inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \
{\
typedef typename vsimd::scalar_type scalar;\
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
}\
template<class vsimd,IfSimd<vsimd> = 0>\
inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \
{\
typedef typename vsimd::scalar_type scalar;\
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
}\
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
{ \
return lhs._internal op rhs; \
} \
template<class vsimd>\
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
{ \
return lhs op rhs._internal; \
} \
#define DECLARE_RELATIONAL(op,functor) DECLARE_RELATIONAL_EQ(op,functor)
#define DECLARE_RELATIONAL(op,functor) \
DECLARE_RELATIONAL_EQ(op,functor) \
template<class vsimd>\
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
{ \
return lhs._internal op rhs._internal; \
}
DECLARE_RELATIONAL(<,slt);
DECLARE_RELATIONAL(<=,sle);
@ -228,4 +229,4 @@ DECLARE_RELATIONAL(!=,sne);
NAMESPACE_END(Grid);
#endif

View File

@ -477,7 +477,7 @@ static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Ortho
typedef typename vobj::vector_type vector_type;
int Nblock = rhs.Grid()->GlobalDimensions()[Orthog];
Vector<ComplexD> ip(Nblock);
std::vector<ComplexD> ip(Nblock);
sn.resize(Nblock);
sliceInnerProductVector(ip,rhs,rhs,Orthog);
@ -586,6 +586,10 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
auto X_v=X.View();
auto Y_v=Y.View();
auto R_v=R.View();
thread_region
{
Vector<vobj> s_x(Nblock);
@ -595,16 +599,16 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice
int o = n*stride + b;
for(int i=0;i<Nblock;i++){
s_x[i] = X[o+i*ostride];
s_x[i] = X_v[o+i*ostride];
}
vobj dot;
for(int i=0;i<Nblock;i++){
dot = Y[o+i*ostride];
dot = Y_v[o+i*ostride];
for(int j=0;j<Nblock;j++){
dot = dot + s_x[j]*(scale*aa(j,i));
}
R[o+i*ostride]=dot;
R_v[o+i*ostride]=dot;
}
}});
}
@ -635,6 +639,8 @@ static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
auto R_v = R.View();
auto X_v = X.View();
thread_region
{
std::vector<vobj> s_x(Nblock);
@ -645,7 +651,7 @@ static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<
int o = n*stride + b;
for(int i=0;i<Nblock;i++){
s_x[i] = X[o+i*ostride];
s_x[i] = X_v[o+i*ostride];
}
vobj dot;
@ -654,7 +660,7 @@ static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<
for(int j=1;j<Nblock;j++){
dot = dot + s_x[j]*(scale*aa(j,i));
}
R[o+i*ostride]=dot;
R_v[o+i*ostride]=dot;
}
}});
}
@ -692,6 +698,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
typedef typename vobj::vector_typeD vector_typeD;
auto lhs_v=lhs.View();
auto rhs_v=rhs.View();
thread_region
{
std::vector<vobj> Left(Nblock);
@ -704,8 +712,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
int o = n*stride + b;
for(int i=0;i<Nblock;i++){
Left [i] = lhs[o+i*ostride];
Right[i] = rhs[o+i*ostride];
Left [i] = lhs_v[o+i*ostride];
Right[i] = rhs_v[o+i*ostride];
}
for(int i=0;i<Nblock;i++){

View File

@ -63,7 +63,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
}
virtual void Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
this->DhopEO(in, out, DaggerNo);
} else {
this->DhopOE(in, out, DaggerNo);
@ -71,7 +71,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
}
virtual void MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
if (in.Checkerboard() == Odd) {
this->DhopEO(in, out, DaggerYes);
} else {
this->DhopOE(in, out, DaggerYes);
@ -80,7 +80,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
// allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
//axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
@ -90,7 +90,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
}
virtual void MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,-this->mu[s]);
@ -121,9 +121,9 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
}
virtual RealD M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerNo);
FermionField tmp(out._grid);
FermionField tmp(out.Grid());
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);

View File

@ -81,16 +81,20 @@ public:
virtual RealD S(const Field &p)
{
assert(p._grid->Nd() == Ndim);
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
assert(p.Grid()->Nd() == Ndim);
static Stencil phiStencil(p.Grid(), npoint, 0, directions, displacements);
phiStencil.HaloExchange(p, compressor);
Field action(p._grid), pshift(p._grid), phisquared(p._grid);
Field action(p.Grid()), pshift(p.Grid()), phisquared(p.Grid());
phisquared = p * p;
action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared;
auto p_v = p.View();
auto action_v = action.View();
for (int mu = 0; mu < Ndim; mu++)
{
// pshift = Cshift(p, mu, +1); // not efficient, implement with stencils
parallel_for(int i = 0; i < p._grid->oSites(); i++)
parallel_for(int i = 0; i < p.Grid()->oSites(); i++)
{
int permute_type;
StencilEntry *SE;
@ -98,23 +102,20 @@ public:
const vobj *temp, *t_p;
SE = phiStencil.GetEntry(permute_type, mu, i);
t_p = &p._odata[i];
t_p = &p_v[i];
if (SE->_is_local)
{
temp = &p._odata[SE->_offset];
if (SE->_permute)
{
temp = &p_v[SE->_offset];
if (SE->_permute) {
permute(temp2, *temp, permute_type);
action._odata[i] -= temp2 * (*t_p) + (*t_p) * temp2;
}
else
{
action._odata[i] -= (*temp) * (*t_p) + (*t_p) * (*temp);
action_v[i] -= temp2 * (*t_p) + (*t_p) * temp2;
} else {
action_v[i] -= (*temp) * (*t_p) + (*t_p) * (*temp);
}
}
else
{
action._odata[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset];
action_v[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset];
}
}
// action -= pshift*p + p*pshift;
@ -127,12 +128,12 @@ public:
virtual void deriv(const Field &p, Field &force)
{
double t0 = usecond();
assert(p._grid->Nd() == Ndim);
assert(p.Grid()->Nd() == Ndim);
force = (2. * Ndim + mass_square) * p - 2. * lambda * p * p * p;
double interm_t = usecond();
// move this outside
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
static Stencil phiStencil(p.Grid(), npoint, 0, directions, displacements);
phiStencil.HaloExchange(p, compressor);
double halo_t = usecond();
@ -145,59 +146,51 @@ public:
for (int point = 0; point < npoint; point++)
{
#pragma omp parallel
{
int permute_type;
StencilEntry *SE;
const vobj *temp;
auto p_v = p.View();
auto force_v = force.View();
int permute_type;
StencilEntry *SE;
const vobj *temp;
#pragma omp for schedule(static, chunk)
for (int i = 0; i < p._grid->oSites(); i++)
{
SE = phiStencil.GetEntry(permute_type, point, i);
// prefetch next p?
if (SE->_is_local)
{
temp = &p._odata[SE->_offset];
if (SE->_permute)
{
parallel_for (int i = 0; i < p.Grid()->oSites(); i++) {
SE = phiStencil.GetEntry(permute_type, point, i);
// prefetch next p?
if (SE->_is_local) {
temp = &p_v[SE->_offset];
if (SE->_permute) {
vobj temp2;
permute(temp2, *temp, permute_type);
force._odata[i] -= temp2;
force_v[i] -= temp2;
} else {
force_v[i] -= *temp; // slow part. Dominated by this read/write (BW)
}
else
{
force._odata[i] -= *temp; // slow part. Dominated by this read/write (BW)
}
}
else
{
force._odata[i] -= phiStencil.CommBuf()[SE->_offset];
} else {
force_v[i] -= phiStencil.CommBuf()[SE->_offset];
}
}
}
}
force *= N / g;
force *= N / g;
double t1 = usecond();
double total_time = (t1 - t0) / 1e6;
double interm_time = (interm_t - t0) / 1e6;
double halo_time = (halo_t - interm_t) / 1e6;
double stencil_time = (t1 - halo_t) / 1e6;
std::cout << GridLogIntegrator << "Total time for force computation (s) : " << total_time << std::endl;
std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl;
std::cout << GridLogIntegrator << "Halo time in force computation (s) : " << halo_time << std::endl;
std::cout << GridLogIntegrator << "Stencil time in force computation (s) : " << stencil_time << std::endl;
double flops = p._grid->gSites() * (14 * N * N * N + 18 * N * N + 2);
double flops_no_stencil = p._grid->gSites() * (14 * N * N * N + 6 * N * N + 2);
double Gflops = flops / (total_time * 1e9);
double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9);
std::cout << GridLogIntegrator << "Flops: " << flops << " - Gflop/s : " << Gflops << std::endl;
std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << " - Gflop/s NS: " << Gflops_no_stencil << std::endl;
}
double t1 = usecond();
double total_time = (t1 - t0) / 1e6;
double interm_time = (interm_t - t0) / 1e6;
double halo_time = (halo_t - interm_t) / 1e6;
double stencil_time = (t1 - halo_t) / 1e6;
std::cout << GridLogIntegrator << "Total time for force computation (s) : " << total_time << std::endl;
std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl;
std::cout << GridLogIntegrator << "Halo time in force computation (s) : " << halo_time << std::endl;
std::cout << GridLogIntegrator << "Stencil time in force computation (s) : " << stencil_time << std::endl;
double flops = p.Grid()->gSites() * (14 * N * N * N + 18 * N * N + 2);
double flops_no_stencil = p.Grid()->gSites() * (14 * N * N * N + 6 * N * N + 2);
double Gflops = flops / (total_time * 1e9);
double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9);
std::cout << GridLogIntegrator << "Flops: " << flops << " - Gflop/s : " << Gflops << std::endl;
std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << " - Gflop/s NS: " << Gflops_no_stencil << std::endl;
}
};
NAMESPACE_END(Grid);

View File

@ -73,7 +73,7 @@ public:
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
GridBase *grid = U._grid;
GridBase *grid = U.Grid();
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
IldgWriter _IldgWriter(grid->IsBoss());

View File

@ -75,7 +75,7 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
GridBase *grid = U._grid;
GridBase *grid = U.Grid();
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
ScidacWriter _ScidacWriter(grid->IsBoss());

View File

@ -128,12 +128,12 @@ public:
// average over all x,y,z the temporal loop
//////////////////////////////////////////////////
static ComplexD avgPolyakovLoop(const GaugeField &Umu) { //assume Nd=4
GaugeMat Ut(Umu._grid), P(Umu._grid);
GaugeMat Ut(Umu.Grid()), P(Umu.Grid());
ComplexD out;
int T = Umu._grid->GlobalDimensions()[3];
int X = Umu._grid->GlobalDimensions()[0];
int Y = Umu._grid->GlobalDimensions()[1];
int Z = Umu._grid->GlobalDimensions()[2];
int T = Umu.Grid()->GlobalDimensions()[3];
int X = Umu.Grid()->GlobalDimensions()[0];
int Y = Umu.Grid()->GlobalDimensions()[1];
int Z = Umu.Grid()->GlobalDimensions()[2];
Ut = peekLorentz(Umu,3); //Select temporal direction
P = Ut;

View File

@ -55,13 +55,13 @@ LOGICAL_BINOP(||);
LOGICAL_BINOP(&&);
template <class T>
strong_inline bool operator==(const iScalar<T> &t1, const iScalar<T> &t2)
accelerator_inline bool operator==(const iScalar<T> &t1, const iScalar<T> &t2)
{
return (t1._internal == t2._internal);
}
template <class T, int N>
strong_inline bool operator==(const iVector<T, N> &t1, const iVector<T, N> &t2)
accelerator_inline bool operator==(const iVector<T, N> &t1, const iVector<T, N> &t2)
{
bool res = true;
@ -74,7 +74,7 @@ strong_inline bool operator==(const iVector<T, N> &t1, const iVector<T, N> &t2)
}
template <class T, int N>
strong_inline bool operator==(const iMatrix<T, N> &t1, const iMatrix<T, N> &t2)
accelerator_inline bool operator==(const iMatrix<T, N> &t1, const iMatrix<T, N> &t2)
{
bool res = true;