1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Merge remote-tracking branch 'upstream/master'

Conflicts:
	lib/math/Grid_math_tensors.h
	lib/simd/Grid_vector_types.h
This commit is contained in:
neo 2015-05-26 13:14:06 +09:00
commit 9ad6d0c65f
39 changed files with 1091 additions and 439 deletions

View File

@ -23,7 +23,7 @@ int main (int argc, char ** argv)
for(int lat=4;lat<=16;lat+=4){
for(int lat=4;lat<=32;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
@ -94,8 +94,7 @@ int main (int argc, char ** argv)
std::cout << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=16;lat+=4){
for(int lat=4;lat<=32;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat});

View File

@ -0,0 +1,11 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
void su3_test_mult_expression(LatticeColourMatrix &z, LatticeColourMatrix &x,LatticeColourMatrix &y)
{
z=x*y;
}

View File

@ -0,0 +1,11 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
void su3_test_mult_routine(LatticeColourMatrix &z, LatticeColourMatrix &x,LatticeColourMatrix &y)
{
mult(z,x,y);
}

View File

@ -24,7 +24,8 @@ int main (int argc, char ** argv)
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
int threads = GridThread::GetThreads();
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
@ -36,11 +37,11 @@ int main (int argc, char ** argv)
// pRNG.SeedFixedIntegers(seeds);
pRNG.SeedRandomDevice();
LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion src (&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
LatticeFermion ref(&Grid); ref=zero;
LatticeFermion err(&Grid);
LatticeFermion tmp(&Grid); tmp=zero;
LatticeFermion err(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
@ -51,8 +52,11 @@ int main (int argc, char ** argv)
// Only one non-zero (y)
Umu=zero;
Complex cone(1.0,0.0);
for(int nn=0;nn<Nd;nn++){
random(pRNG,U[nn]);
if (nn!=0) U[nn]=zero;
else U[nn] = cone;
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
}
@ -78,7 +82,7 @@ int main (int argc, char ** argv)
}
RealD mass=0.1;
WilsonMatrix Dw(Umu,mass);
WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
std::cout << "Calling Dw"<<std::endl;
int ncall=1000;
@ -93,7 +97,7 @@ int main (int argc, char ** argv)
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref -result;
err = ref-result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
@ -129,9 +133,8 @@ int main (int argc, char ** argv)
std::cout << "Called DwDag"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
err = ref -result;
err = ref-result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,61 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeFermion src(&Grid); random(pRNG,src);
RealD nrm = norm2(src);
LatticeFermion result(&Grid); result=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
for(int mu=0;mu<Nd;mu++){
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
}
RealD mass=0.5;
WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
// HermitianOperator<WilsonMatrix,LatticeFermion> HermOp(Dw);
// ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
// CG(HermOp,src,result);
LatticeFermion src_o(&RBGrid);
LatticeFermion result_o(&RBGrid);
pickCheckerboard(Odd,src_o,src);
result_o=zero;
HermitianCheckerBoardedOperator<WilsonMatrix,LatticeFermion> HermOpEO(Dw);
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
CG(HermOpEO,src_o,result_o);
Grid_finalize();
}

View File

@ -0,0 +1,48 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
LatticeFermion resid(&Grid);
RealD mass=0.5;
WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
SchurRedBlackSolve<LatticeFermion> SchurSolver(CG);
SchurSolver(Dw,src,result);
Grid_finalize();
}

View File

@ -24,13 +24,14 @@ int main (int argc, char ** argv)
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeFermion src(&Grid); random(pRNG,src);
std::cout << "src norm" << norm2(src)<<std::endl;
RealD nrm = norm2(src);
LatticeFermion result(&Grid); result=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
@ -46,11 +47,11 @@ int main (int argc, char ** argv)
}
RealD mass=0.5;
WilsonMatrix Dw(Umu,mass);
WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
HermitianOperator<WilsonMatrix,LatticeFermion> HermOp(Dw);
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
CG(HermOp,src,result);
Grid_finalize();
}

View File

@ -0,0 +1,201 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
int threads = GridThread::GetThreads();
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
// std::vector<int> seeds({1,2,3,4});
// pRNG.SeedFixedIntegers(seeds);
pRNG.SeedRandomDevice();
LatticeFermion src (&Grid); random(pRNG,src);
LatticeFermion phi (&Grid); random(pRNG,phi);
LatticeFermion chi (&Grid); random(pRNG,chi);
LatticeFermion result(&Grid); result=zero;
LatticeFermion ref(&Grid); ref=zero;
LatticeFermion tmp(&Grid); tmp=zero;
LatticeFermion err(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
// Only one non-zero (y)
Umu=zero;
for(int nn=0;nn<Nd;nn++){
random(pRNG,U[nn]);
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
}
RealD mass=0.1;
WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
LatticeFermion src_e (&RBGrid);
LatticeFermion src_o (&RBGrid);
LatticeFermion r_e (&RBGrid);
LatticeFermion r_o (&RBGrid);
LatticeFermion r_eo (&Grid);
const int Even=0;
const int Odd=1;
std::cout<<"=========================================================="<<std::endl;
std::cout<<"= Testing that Deo + Doe = Dunprec "<<std::endl;
std::cout<<"=========================================================="<<std::endl;
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd,src_o,src);
Dw.Meooe(src_e,r_o); std::cout<<"Applied Meo"<<std::endl;
Dw.Meooe(src_o,r_e); std::cout<<"Applied Moe"<<std::endl;
Dw.Dhop (src,ref,0);
setCheckerboard(r_eo,r_o);
setCheckerboard(r_eo,r_e);
ref = (-0.5)*ref;
err= ref - r_eo;
std::cout << "EO norm diff "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
LatticeComplex cerr(&Grid);
cerr = localInnerProduct(err,err);
std::cout<<"=============================================================="<<std::endl;
std::cout<<"= Test Ddagger is the dagger of D by requiring "<<std::endl;
std::cout<<"= < phi | Deo | chi > * = < chi | Deo^dag| phi> "<<std::endl;
std::cout<<"=============================================================="<<std::endl;
LatticeFermion chi_e (&RBGrid);
LatticeFermion chi_o (&RBGrid);
LatticeFermion dchi_e (&RBGrid);
LatticeFermion dchi_o (&RBGrid);
LatticeFermion phi_e (&RBGrid);
LatticeFermion phi_o (&RBGrid);
LatticeFermion dphi_e (&RBGrid);
LatticeFermion dphi_o (&RBGrid);
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
Dw.Meooe(chi_e,dchi_o);
Dw.Meooe(chi_o,dchi_e);
Dw.MeooeDag(phi_e,dphi_o);
Dw.MeooeDag(phi_o,dphi_e);
ComplexD pDce = innerProduct(phi_e,dchi_e);
ComplexD pDco = innerProduct(phi_o,dchi_o);
ComplexD cDpe = innerProduct(chi_e,dphi_e);
ComplexD cDpo = innerProduct(chi_o,dphi_o);
std::cout <<"e "<<pDce<<" "<<cDpe <<std::endl;
std::cout <<"o "<<pDco<<" "<<cDpo <<std::endl;
std::cout <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
std::cout <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
std::cout<<"=============================================================="<<std::endl;
std::cout<<"= Test MeeInv Mee = 1 "<<std::endl;
std::cout<<"=============================================================="<<std::endl;
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
Dw.Mooee(chi_e,src_e);
Dw.MooeeInv(src_e,phi_e);
Dw.Mooee(chi_o,src_o);
Dw.MooeeInv(src_o,phi_o);
setCheckerboard(phi,phi_e);
setCheckerboard(phi,phi_o);
err = phi-chi;
std::cout << "norm diff "<< norm2(err)<< std::endl;
std::cout<<"=============================================================="<<std::endl;
std::cout<<"= Test MeeInvDag MeeDag = 1 "<<std::endl;
std::cout<<"=============================================================="<<std::endl;
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
Dw.MooeeDag(chi_e,src_e);
Dw.MooeeInvDag(src_e,phi_e);
Dw.MooeeDag(chi_o,src_o);
Dw.MooeeInvDag(src_o,phi_o);
setCheckerboard(phi,phi_e);
setCheckerboard(phi,phi_o);
err = phi-chi;
std::cout << "norm diff "<< norm2(err)<< std::endl;
std::cout<<"=============================================================="<<std::endl;
std::cout<<"= Test MpcDagMpc is Hermitian "<<std::endl;
std::cout<<"=============================================================="<<std::endl;
random(pRNG,phi);
random(pRNG,chi);
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
Dw.MpcDagMpc(chi_e,dchi_e,t1,t2);
Dw.MpcDagMpc(chi_o,dchi_o,t1,t2);
Dw.MpcDagMpc(phi_e,dphi_e,t1,t2);
Dw.MpcDagMpc(phi_o,dphi_o,t1,t2);
pDce = innerProduct(phi_e,dchi_e);
pDco = innerProduct(phi_o,dchi_o);
cDpe = innerProduct(chi_e,dphi_e);
cDpo = innerProduct(chi_o,dphi_o);
std::cout <<"e "<<pDce<<" "<<cDpe <<std::endl;
std::cout <<"o "<<pDco<<" "<<cDpo <<std::endl;
std::cout <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
std::cout <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
Grid_finalize();
}

View File

@ -5,18 +5,27 @@ AM_LDFLAGS = -L$(top_builddir)/lib
#
# Test code
#
bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3 Grid_wilson_cg_unprec
bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3 Grid_wilson_cg_unprec Grid_wilson_evenodd Grid_wilson_cg_prec Grid_wilson_cg_schur
Grid_wilson_SOURCES = Grid_wilson.cc
Grid_wilson_LDADD = -lGrid
Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc
Grid_wilson_evenodd_LDADD = -lGrid
Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc
Grid_wilson_cg_unprec_LDADD = -lGrid
Grid_wilson_cg_prec_SOURCES = Grid_wilson_cg_prec.cc
Grid_wilson_cg_prec_LDADD = -lGrid
Grid_wilson_cg_schur_SOURCES = Grid_wilson_cg_schur.cc
Grid_wilson_cg_schur_LDADD = -lGrid
Grid_comms_SOURCES = Grid_comms.cc
Grid_comms_LDADD = -lGrid
Grid_su3_SOURCES = Grid_su3.cc
Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc Grid_su3_expr.cc
Grid_su3_LDADD = -lGrid
Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc

View File

@ -11,6 +11,7 @@
#include <sys/time.h>
#include <signal.h>
#include <iostream>
#include <iterator>
#include <Grid.h>
#include <algorithm>
#include <iterator>

View File

@ -13,28 +13,6 @@
typedef uint32_t Integer;
#ifdef SSE4
#include <pmmintrin.h>
#endif
#if defined(AVX1) || defined (AVX2)
#include <immintrin.h>
// _mm256_set_m128i(hi,lo); // not defined in all versions of immintrin.h
#ifndef _mm256_set_m128i
#define _mm256_set_m128i(hi,lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo),(hi),1)
#endif
#endif
#ifdef AVX512
#include <immintrin.h>
#ifndef KNC_ONLY_STORES
#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
#endif
#endif
namespace Grid {
typedef float RealF;
@ -117,7 +95,7 @@ namespace Grid {
template<> inline void zeroit(RealF &arg){ arg=0; };
template<> inline void zeroit(RealD &arg){ arg=0; };
// Eventually delete this part
#if defined (SSE4)
typedef __m128 fvec;
typedef __m128d dvec;
@ -146,6 +124,7 @@ namespace Grid {
typedef vector4double dvec;
typedef vector4double zvec;
#endif
#if defined (AVX1) || defined (AVX2) || defined (AVX512)
inline void v_prefetch0(int size, const char *ptr){
for(int i=0;i<size;i+=64){ // Define L1 linesize above// What about SSE?

View File

@ -120,8 +120,8 @@ namespace Grid {
// Map to always positive shift modulo global full dimension.
int shift = (displacement+fd)%fd;
int checkerboard = _grid->CheckerBoardDestination(source.checkerboard,shift);
assert (checkerboard== _checkerboard);
// int checkerboard = _grid->CheckerBoardDestination(source.checkerboard,shift);
assert (source.checkerboard== _checkerboard);
// the permute type
int simd_layout = _grid->_simd_layout[dimension];

View File

@ -5,6 +5,8 @@
#define GRID_OMP
#endif
#define UNROLL _Pragma("unroll")
#ifdef GRID_OMP
#include <omp.h>
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for")

View File

@ -25,7 +25,7 @@ namespace Grid {
/////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class HermitianOperatorBase : public LinearOperatorBase<Field> {
public:
virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2);
virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2)=0;
void AdjOp(const Field &in, Field &out) {
Op(in,out);
};
@ -106,6 +106,10 @@ namespace Grid {
public:
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
};
template<class Field> class HermitianOperatorFunction {
public:
virtual void operator() (HermitianOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
};
// FIXME : To think about

View File

@ -10,6 +10,7 @@ namespace Grid {
/////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SparseMatrixBase {
public:
GridBase *_grid;
// Full checkerboar operations
virtual RealD M (const Field &in, Field &out)=0;
virtual RealD Mdag (const Field &in, Field &out)=0;
@ -18,6 +19,7 @@ namespace Grid {
ni=M(in,tmp);
no=Mdag(tmp,out);
}
SparseMatrixBase(GridBase *grid) : _grid(grid) {};
};
/////////////////////////////////////////////////////////////////////////////////////////////
@ -25,7 +27,7 @@ namespace Grid {
/////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
public:
GridBase *_cbgrid;
// half checkerboard operaions
virtual void Meooe (const Field &in, Field &out)=0;
virtual void Mooee (const Field &in, Field &out)=0;
@ -44,9 +46,7 @@ namespace Grid {
Meooe(out,tmp);
Mooee(in,out);
out=out-tmp; // axpy_norm
RealD n=norm2(out);
return n;
return axpy_norm(out,-1.0,tmp,out);
}
virtual RealD MpcDag (const Field &in, Field &out){
Field tmp(in._grid);
@ -56,15 +56,15 @@ namespace Grid {
MeooeDag(out,tmp);
MooeeDag(in,out);
out=out-tmp; // axpy_norm
RealD n=norm2(out);
return n;
return axpy_norm(out,-1.0,tmp,out);
}
virtual void MpcDagMpc(const Field &in, Field &out,RealD ni,RealD no) {
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
Field tmp(in._grid);
ni=Mpc(in,tmp);
no=Mpc(tmp,out);
no=MpcDag(tmp,out);
// std::cout<<"MpcDagMpc "<<ni<<" "<<no<<std::endl;
}
CheckerBoardedSparseMatrixBase(GridBase *grid,GridBase *cbgrid) : SparseMatrixBase<Field>(grid), _cbgrid(cbgrid) {};
};
}

View File

@ -31,7 +31,7 @@ public:
bigfloat(const double d) { mpf_init_set_d(x, d); }
bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
~bigfloat(void) { mpf_clear(x); }
operator const double (void) const { return (double)mpf_get_d(x); }
operator double (void) const { return (double)mpf_get_d(x); }
static void setDefaultPrecision(unsigned long dprec) {
unsigned long bprec = (unsigned long)(3.321928094 * (double)dprec);
mpf_set_default_prec(bprec);

View File

@ -9,18 +9,21 @@ namespace Grid {
/////////////////////////////////////////////////////////////
template<class Field>
class ConjugateGradient : public OperatorFunction<Field> {
class ConjugateGradient : public HermitianOperatorFunction<Field> {
public:
RealD Tolerance;
Integer MaxIterations;
int verbose;
ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
std::cout << Tolerance<<std::endl;
verbose=0;
};
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi) {assert(0);};
void operator() (HermitianOperatorBase<Field> &Linop,const Field &src, Field &psi){
psi.checkerboard = src.checkerboard;
conformable(psi,src);
RealD cp,c,a,d,b,ssq,qq,b_pred;
Field p(src);
@ -38,14 +41,16 @@ public:
a =norm2(p);
cp =a;
ssq=norm2(src);
std::cout <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: r "<<cp <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
if ( verbose ) {
std::cout <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
}
RealD rsq = Tolerance* Tolerance*ssq;
//Check if guess is really REALLY good :)
@ -61,14 +66,16 @@ public:
c=cp;
Linop.OpAndNorm(p,mmp,d,qq);
// std::cout <<std::setprecision(4)<< "ConjugateGradient: d,qq "<<d<< " "<<qq <<std::endl;
RealD qqck = norm2(mmp);
ComplexD dck = innerProduct(p,mmp);
// if (verbose) std::cout <<std::setprecision(4)<< "ConjugateGradient: d,qq "<<d<< " "<<qq <<" qqcheck "<< qqck<< " dck "<< dck<<std::endl;
a = c/d;
b_pred = a*(a*qq-d)/c;
// std::cout <<std::setprecision(4)<< "ConjugateGradient: a,bp "<<a<< " "<<b_pred <<std::endl;
// if (verbose) std::cout <<std::setprecision(4)<< "ConjugateGradient: a,bp "<<a<< " "<<b_pred <<std::endl;
cp = axpy_norm(r,-a,mmp,r);
b = cp/c;
// std::cout <<std::setprecision(4)<< "ConjugateGradient: cp,b "<<cp<< " "<<b <<std::endl;
@ -77,7 +84,16 @@ public:
psi= a*p+psi;
p = p*b+r;
std::cout<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
if (verbose) std::cout<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
// Hack
if (0) {
Field tt(src);
Linop.Op(psi,mmp);
tt=mmp-src;
RealD resnorm = norm2(tt);
std::cout<<"ConjugateGradient: Iteration " <<k<<" true residual "<<resnorm << " computed " << cp <<std::endl;
}
// Stopping condition
if ( cp <= rsq ) {

View File

@ -1,6 +1,7 @@
#ifndef GRID_SCHUR_RED_BLACK_H
#define GRID_SCHUR_RED_BLACK_H
/*
* Red black Schur decomposition
*
@ -25,80 +26,86 @@
* M psi = eta
***********************
*Odd
* i) (D_oo)^{\dag} D_oo psi_o = (D_oo)^\dag L^{-1} eta_o
* eta_o' = D_oo (eta_o - Moe Mee^{-1} eta_e)
* i) (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
*Even
* ii) Mee psi_e + Meo psi_o = src_e
*
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
*
*/
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackSolve : public OperatorFunction<Field>{
template<class Field> class SchurRedBlackSolve {
private:
SparseMatrixBase<Field> & _Matrix;
OperatorFunction<Field> & _HermitianRBSolver;
HermitianOperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackSolve(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianRBSolver)
: _Matrix(Matrix), _HermitianRBSolver(HermitianRBSolver) {
SchurRedBlackSolve(HermitianOperatorFunction<Field> &HermitianRBSolver) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
};
void operator() (const Field &in, Field &out){
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME need to make eo grid from full grid.
// FIXME use CBfactorise to control schur decomp
const int Even=0;
const int Odd =1;
// Make a cartesianRedBlack from full Grid
GridRedBlackCartesian grid(in._grid);
GridBase *grid = _Matrix._cbgrid;
GridBase *fgrid= _Matrix._grid;
Field src_e(&grid);
Field src_o(&grid);
Field sol_e(&grid);
Field sol_o(&grid);
Field tmp(&grid);
Field Mtmp(&grid);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); // MooeeInv(source[Even],tmp,DaggerNo,Even);
_Matrix.Meooe (tmp,Mtmp); // Meo (tmp,src,Odd,DaggerNo);
tmp=src_o-Mtmp; // axpy (tmp,src,source[Odd],-1.0);
_Matrix.MpcDag(tmp,src_o); // Mprec(tmp,src,Mtmp,DaggerYes);
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
_Matrix.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
_HermitianRBSolver(src_o,sol_o); // CGNE_prec_MdagM(solution[Odd],src);
HermitianCheckerBoardedOperator<Matrix,Field> _HermOpEO(_Matrix);
std::cout << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); // Meo(solution[Odd],tmp,Even,DaggerNo);
src_e = src_e-tmp; // axpy(src,tmp,source[Even],-1.0);
_Matrix.MooeeInv(src_e,sol_e); // MooeeInv(src,solution[Even],DaggerNo,Even);
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e);
setCheckerboard(out,sol_o);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout << "SchurRedBlack solver true unprec resid "<< sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};

View File

@ -4,6 +4,13 @@
namespace Grid {
static const int CbRed =0;
static const int CbBlack=1;
static const int Even =CbRed;
static const int Odd =CbBlack;
static const int DaggerNo=0;
static const int DaggerYes=1;
// Specialise this for red black grids storing half the data like a chess board.
class GridRedBlackCartesian : public GridBase
{
@ -44,6 +51,9 @@ public:
return source_cb;
}
};
GridRedBlackCartesian(GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors) {};
GridRedBlackCartesian(std::vector<int> &dimensions,
std::vector<int> &simd_layout,
std::vector<int> &processor_grid ) : GridBase(processor_grid)

View File

@ -91,6 +91,47 @@ inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<
GridFromExpression(grid,std::get<2>(expr.second));
}
//////////////////////////////////////////////////////////////////////////
// Obtain the CB from an expression, ensuring conformable. This must follow a tree recursion
//////////////////////////////////////////////////////////////////////////
template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
inline void CBFromExpression(int &cb,const T1& lat) // Lattice leaf
{
if ( (cb==Odd) || (cb==Even) ) {
assert(cb==lat.checkerboard);
}
cb=lat.checkerboard;
// std::cout<<"Lattice leaf cb "<<cb<<std::endl;
}
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf
{
// std::cout<<"Non lattice leaf cb"<<cb<<std::endl;
}
template <typename Op, typename T1>
inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
{
CBFromExpression(cb,std::get<0>(expr.second));// recurse
// std::cout<<"Unary node cb "<<cb<<std::endl;
}
template <typename Op, typename T1, typename T2>
inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &expr)
{
CBFromExpression(cb,std::get<0>(expr.second));// recurse
CBFromExpression(cb,std::get<1>(expr.second));
// std::cout<<"Binary node cb "<<cb<<std::endl;
}
template <typename Op, typename T1, typename T2, typename T3>
inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
{
CBFromExpression(cb,std::get<0>(expr.second));// recurse
CBFromExpression(cb,std::get<1>(expr.second));
CBFromExpression(cb,std::get<2>(expr.second));
// std::cout<<"Trinary node cb "<<cb<<std::endl;
}
////////////////////////////////////////////
// Unary operators and funcs
////////////////////////////////////////////

View File

@ -9,45 +9,68 @@ namespace Grid {
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class obj1,class obj2,class obj3> strong_inline
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
vstream(ret._odata[ss],tmp);
// mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
#else
mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
#endif
}
}
template<class obj1,class obj2,class obj3> strong_inline
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mac(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
vstream(ret._odata[ss],tmp);
#else
mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
#endif
}
}
template<class obj1,class obj2,class obj3> strong_inline
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
sub(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
vstream(ret._odata[ss],tmp);
#else
sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
#endif
}
}
template<class obj1,class obj2,class obj3> strong_inline
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
add(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
vstream(ret._odata[ss],tmp);
#else
add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
#endif
}
}
@ -56,6 +79,7 @@ PARALLEL_FOR_LOOP
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class obj1,class obj2,class obj3> strong_inline
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(lhs,ret);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
@ -67,7 +91,8 @@ PARALLEL_FOR_LOOP
template<class obj1,class obj2,class obj3> strong_inline
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
conformable(lhs,ret);
ret.checkerboard = lhs.checkerboard;
conformable(ret,lhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
obj1 tmp;
@ -78,22 +103,32 @@ PARALLEL_FOR_LOOP
template<class obj1,class obj2,class obj3> strong_inline
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
conformable(lhs,ret);
ret.checkerboard = lhs.checkerboard;
conformable(ret,lhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
sub(&tmp,&lhs._odata[ss],&rhs);
vstream(ret._odata[ss],tmp);
#else
sub(&ret._odata[ss],&lhs._odata[ss],&rhs);
#endif
}
}
template<class obj1,class obj2,class obj3> strong_inline
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(lhs,ret);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
add(&tmp,&lhs._odata[ss],&rhs);
vstream(ret._odata[ss],tmp);
#else
add(&ret._odata[ss],&lhs._odata[ss],&rhs);
#endif
}
}
@ -102,84 +137,112 @@ PARALLEL_FOR_LOOP
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class obj1,class obj2,class obj3> strong_inline
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mult(&tmp,&lhs,&rhs._odata[ss]);
vstream(ret._odata[ss],tmp);
#else
mult(&ret._odata[ss],&lhs,&rhs._odata[ss]);
#endif
}
}
template<class obj1,class obj2,class obj3> strong_inline
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mac(&tmp,&lhs,&rhs._odata[ss]);
vstream(ret._odata[ss],tmp);
#else
mac(&ret._odata[ss],&lhs,&rhs._odata[ss]);
#endif
}
}
template<class obj1,class obj2,class obj3> strong_inline
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
sub(&tmp,&lhs,&rhs._odata[ss]);
vstream(ret._odata[ss],tmp);
#else
sub(&ret._odata[ss],&lhs,&rhs._odata[ss]);
#endif
}
}
template<class obj1,class obj2,class obj3> strong_inline
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
add(&tmp,&lhs,&rhs._odata[ss]);
vstream(ret._odata[ss],tmp);
#else
add(&ret._odata[ss],&lhs,&rhs._odata[ss]);
#endif
}
}
template<class sobj,class vobj> strong_inline
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
#pragma omp parallel for
PARALLEL_FOR_LOOP
for(int ss=0;ss<x._grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = a*x._odata[ss]+y._odata[ss];
vstream(ret._odata[ss],tmp);
#else
ret._odata[ss]=a*x._odata[ss]+y._odata[ss];
#endif
}
}
template<class sobj,class vobj> strong_inline
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
#pragma omp parallel for
PARALLEL_FOR_LOOP
for(int ss=0;ss<x._grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vstream(ret._odata[ss],tmp);
#else
ret._odata[ss]=a*x._odata[ss]+b*y._odata[ss];
#endif
}
}
template<class sobj,class vobj> strong_inline
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+y._odata[ss];
vstream(ret._odata[ss],tmp);
}
axpy(ret,a,x,y);
return norm2(ret);
}
template<class sobj,class vobj> strong_inline
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vstream(ret._odata[ss],tmp);
}
axpby(ret,a,b,x,y);
return norm2(ret); // FIXME implement parallel norm in ss loop
}

View File

@ -1,6 +1,8 @@
#ifndef GRID_LATTICE_BASE_H
#define GRID_LATTICE_BASE_H
#define STREAMING_STORES
namespace Grid {
// TODO:
@ -64,60 +66,136 @@ public:
////////////////////////////////////////////////////////////////////////////////
template <typename Op, typename T1> strong_inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
{
GridBase *egrid(nullptr);
GridFromExpression(egrid,expr);
assert(egrid!=nullptr);
conformable(_grid,egrid);
int cb=-1;
CBFromExpression(cb,expr);
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
vobj tmp= eval(ss,expr);
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
#else
_odata[ss]=eval(ss,expr);
#endif
}
return *this;
}
template <typename Op, typename T1,typename T2> strong_inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
{
GridBase *egrid(nullptr);
GridFromExpression(egrid,expr);
assert(egrid!=nullptr);
conformable(_grid,egrid);
int cb=-1;
CBFromExpression(cb,expr);
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
vobj tmp= eval(ss,expr);
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
#else
_odata[ss]=eval(ss,expr);
#endif
}
return *this;
}
template <typename Op, typename T1,typename T2,typename T3> strong_inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
{
GridBase *egrid(nullptr);
GridFromExpression(egrid,expr);
assert(egrid!=nullptr);
conformable(_grid,egrid);
int cb=-1;
CBFromExpression(cb,expr);
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
vobj tmp= eval(ss,expr);
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
#else
_odata[ss] = eval(ss,expr);
#endif
}
return *this;
}
//GridFromExpression is tricky to do
template<class Op,class T1>
Lattice(const LatticeUnaryExpression<Op,T1> & expr): _grid(nullptr){
GridFromExpression(_grid,expr);
assert(_grid!=nullptr);
int cb=-1;
CBFromExpression(cb,expr);
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
_odata.resize(_grid->oSites());
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss] = eval(ss,expr);
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
#else
_odata[ss]=_eval(ss,expr);
#endif
}
};
template<class Op,class T1, class T2>
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr): _grid(nullptr){
GridFromExpression(_grid,expr);
assert(_grid!=nullptr);
int cb=-1;
CBFromExpression(cb,expr);
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
_odata.resize(_grid->oSites());
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss] = eval(ss,expr);
#ifdef STREAMING_STORES
vobj tmp = eval(tmp,ss,expr);
vstream(_odata[ss] ,tmp);
#else
_odata[ss]=eval(ss,expr);
#endif
}
};
template<class Op,class T1, class T2, class T3>
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr): _grid(nullptr){
GridFromExpression(_grid,expr);
assert(_grid!=nullptr);
int cb=-1;
CBFromExpression(cb,expr);
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
_odata.resize(_grid->oSites());
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss] = eval(ss,expr);
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
#else
_odata[ss]=eval(ss,expr);
#endif
}
};
@ -140,6 +218,7 @@ PARALLEL_FOR_LOOP
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
std::cout<<"Lattice operator ="<<std::endl;
PARALLEL_FOR_LOOP

View File

@ -16,19 +16,18 @@ strong_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> *
template<class rrtype,class ltype,class rtype,int N>
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){
int c3=0;
for(int c2=0;c2<N;c2++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]);
}
}
for(int c3=1;c3<N;c3++){
for(int c1=0;c1<N;c1++){
for(int c1=0;c1<N;c1++){
for(int c3=1;c3<N;c3++){
for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}
}
}
return;
return;
}
template<class rrtype,class ltype,class rtype,int N>

View File

@ -34,66 +34,64 @@ public:
// Scalar no action
// template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
iScalar()=default;
iScalar() = default;
/*
iScalar(const iScalar<vtype> &copyme)=default;
iScalar(iScalar<vtype> &&copyme)=default;
iScalar<vtype> & operator= (const iScalar<vtype> &copyme) = default;
iScalar<vtype> & operator= (iScalar<vtype> &&copyme) = default;
*/
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
iScalar(const Zero &z){ *this = zero; };
iScalar<vtype> & operator= (const Zero &hero){
zeroit(*this);
return *this;
}
friend strong_inline void vstream(iScalar<vtype> &out,const iScalar<vtype> &in){
vstream(out._internal,in._internal);
}
friend strong_inline void zeroit(iScalar<vtype> &that){
zeroit(that._internal);
}
friend strong_inline void prefetch(iScalar<vtype> &that){
prefetch(that._internal);
}
friend strong_inline void permute(iScalar<vtype> &out,const iScalar<vtype> &in,int permutetype){
permute(out._internal,in._internal,permutetype);
}
iScalar<vtype> & operator= (const Zero &hero){
zeroit(*this);
return *this;
}
friend strong_inline void vstream(iScalar<vtype> &out,const iScalar<vtype> &in){
vstream(out._internal,in._internal);
}
friend strong_inline void zeroit(iScalar<vtype> &that){
zeroit(that._internal);
}
friend strong_inline void prefetch(iScalar<vtype> &that){
prefetch(that._internal);
}
friend strong_inline void permute(iScalar<vtype> &out,const iScalar<vtype> &in,int permutetype){
permute(out._internal,in._internal,permutetype);
}
// Unary negation
friend strong_inline iScalar<vtype> operator -(const iScalar<vtype> &r) {
iScalar<vtype> ret;
ret._internal= -r._internal;
return ret;
}
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
strong_inline iScalar<vtype> &operator *=(const iScalar<vtype> &r) {
*this = (*this)*r;
return *this;
}
strong_inline iScalar<vtype> &operator -=(const iScalar<vtype> &r) {
*this = (*this)-r;
return *this;
}
strong_inline iScalar<vtype> &operator +=(const iScalar<vtype> &r) {
*this = (*this)+r;
return *this;
}
strong_inline vtype & operator ()(void) {
return _internal;
}
strong_inline const vtype & operator ()(void) const {
return _internal;
}
operator ComplexD () const { return(TensorRemove(_internal)); };
operator RealD () const { return(real(TensorRemove(_internal))); }
// convert from a something to a scalar
template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iScalar<vtype>
// Unary negation
friend strong_inline iScalar<vtype> operator -(const iScalar<vtype> &r) {
iScalar<vtype> ret;
ret._internal= -r._internal;
return ret;
}
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
strong_inline iScalar<vtype> &operator *=(const iScalar<vtype> &r) {
*this = (*this)*r;
return *this;
}
strong_inline iScalar<vtype> &operator -=(const iScalar<vtype> &r) {
*this = (*this)-r;
return *this;
}
strong_inline iScalar<vtype> &operator +=(const iScalar<vtype> &r) {
*this = (*this)+r;
return *this;
}
strong_inline vtype & operator ()(void) {
return _internal;
}
strong_inline const vtype & operator ()(void) const {
return _internal;
}
operator ComplexD () const { return(TensorRemove(_internal)); };
operator RealD () const { return(real(TensorRemove(_internal))); }
// convert from a something to a scalar
template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iScalar<vtype>
{
_internal = vtype(arg);
return *this;
@ -129,67 +127,72 @@ public:
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
iVector(const Zero &z){ *this = zero; };
iVector() =default;
/*
iVector(const iVector<vtype,N> &copyme)=default;
iVector(iVector<vtype,N> &&copyme)=default;
iVector<vtype,N> & operator= (const iVector<vtype,N> &copyme) = default;
iVector<vtype,N> & operator= (iVector<vtype,N> &&copyme) = default;
*/
iVector<vtype,N> & operator= (const Zero &hero){
zeroit(*this);
return *this;
iVector<vtype,N> & operator= (const Zero &hero){
zeroit(*this);
return *this;
}
friend strong_inline void zeroit(iVector<vtype,N> &that){
for(int i=0;i<N;i++){
zeroit(that._internal[i]);
}
friend strong_inline void zeroit(iVector<vtype,N> &that){
for(int i=0;i<N;i++){
zeroit(that._internal[i]);
}
}
friend strong_inline void prefetch(iVector<vtype,N> &that){
for(int i=0;i<N;i++) prefetch(that._internal[i]);
}
friend strong_inline void vstream(iVector<vtype,N> &out,const iVector<vtype,N> &in){
for(int i=0;i<N;i++){
vstream(out._internal[i],in._internal[i]);
}
friend strong_inline void prefetch(iVector<vtype,N> &that){
for(int i=0;i<N;i++) prefetch(that._internal[i]);
}
friend strong_inline void permute(iVector<vtype,N> &out,const iVector<vtype,N> &in,int permutetype){
for(int i=0;i<N;i++){
permute(out._internal[i],in._internal[i],permutetype);
}
friend strong_inline void vstream(iVector<vtype,N> &out,const iVector<vtype,N> &in){
for(int i=0;i<N;i++){
vstream(out._internal[i],in._internal[i]);
}
}
// Unary negation
friend strong_inline iVector<vtype,N> operator -(const iVector<vtype,N> &r) {
iVector<vtype,N> ret;
for(int i=0;i<N;i++) ret._internal[i]= -r._internal[i];
return ret;
}
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
strong_inline iVector<vtype,N> &operator *=(const iScalar<vtype> &r) {
*this = (*this)*r;
return *this;
}
strong_inline iVector<vtype,N> &operator -=(const iVector<vtype,N> &r) {
*this = (*this)-r;
return *this;
}
strong_inline iVector<vtype,N> &operator +=(const iVector<vtype,N> &r) {
*this = (*this)+r;
return *this;
}
strong_inline vtype & operator ()(int i) {
return _internal[i];
}
strong_inline const vtype & operator ()(int i) const {
return _internal[i];
}
friend std::ostream& operator<< (std::ostream& stream, const iVector<vtype,N> &o){
stream<< "V<"<<N<<">{";
for(int i=0;i<N;i++) {
stream<<o._internal[i];
if (i<N-1) stream<<",";
}
friend strong_inline void permute(iVector<vtype,N> &out,const iVector<vtype,N> &in,int permutetype){
for(int i=0;i<N;i++){
permute(out._internal[i],in._internal[i],permutetype);
}
}
// Unary negation
friend strong_inline iVector<vtype,N> operator -(const iVector<vtype,N> &r) {
iVector<vtype,N> ret;
for(int i=0;i<N;i++) ret._internal[i]= -r._internal[i];
return ret;
}
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
strong_inline iVector<vtype,N> &operator *=(const iScalar<vtype> &r) {
*this = (*this)*r;
return *this;
}
strong_inline iVector<vtype,N> &operator -=(const iVector<vtype,N> &r) {
*this = (*this)-r;
return *this;
}
strong_inline iVector<vtype,N> &operator +=(const iVector<vtype,N> &r) {
*this = (*this)+r;
return *this;
}
strong_inline vtype & operator ()(int i) {
return _internal[i];
}
strong_inline const vtype & operator ()(int i) const {
return _internal[i];
}
friend std::ostream& operator<< (std::ostream& stream, const iVector<vtype,N> &o){
stream<< "V<"<<N<<">{";
for(int i=0;i<N;i++) {
stream<<o._internal[i];
if (i<N-1) stream<<",";
}
stream<<"}";
return stream;
};
// strong_inline vtype && operator ()(int i) {
// return _internal[i];
// }
stream<<"}";
return stream;
};
// strong_inline vtype && operator ()(int i) {
// return _internal[i];
// }
};
template<class vtype,int N> class iMatrix
@ -209,8 +212,6 @@ public:
iMatrix(const Zero &z){ *this = zero; };
iMatrix() =default;
// No copy constructor...
iMatrix& operator=(const iMatrix& rhs){
for(int i=0;i<N;i++)
@ -221,6 +222,17 @@ public:
iMatrix(scalar_type s) { (*this) = s ;};// recurse down and hit the constructor for vector_type
/*
iMatrix(const iMatrix<vtype,N> &copyme)=default;
iMatrix(iMatrix<vtype,N> &&copyme)=default;
iMatrix<vtype,N> & operator= (const iMatrix<vtype,N> &copyme) = default;
iMatrix<vtype,N> & operator= (iMatrix<vtype,N> &&copyme) = default;
*/
iMatrix<vtype,N> & operator= (const Zero &hero){
zeroit(*this);
return *this;

View File

@ -10,9 +10,6 @@ namespace QCD {
static const int Nhs=2; // half spinor
static const int Nds=8; // double stored gauge field
static const int CbRed =0;
static const int CbBlack=1;
//////////////////////////////////////////////////////////////////////////////
// QCD iMatrix types
// Index conventions: Lorentz x Spin x Colour

View File

@ -4,8 +4,8 @@
namespace Grid {
namespace QCD {
const std::vector<int> WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3,0});
const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1,0});
const std::vector<int> WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3});
const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1});
// Should be in header?
const int WilsonMatrix::Xp = 0;
@ -16,7 +16,6 @@ const int WilsonMatrix::Xm = 4;
const int WilsonMatrix::Ym = 5;
const int WilsonMatrix::Zm = 6;
const int WilsonMatrix::Tm = 7;
//const int WilsonMatrix::X0 = 8;
class WilsonCompressor {
public:
@ -72,20 +71,27 @@ const int WilsonMatrix::Tm = 7;
}
};
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,double _mass)
: Stencil(Umu._grid,npoint,0,directions,displacements),
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid, double _mass) :
CheckerBoardedSparseMatrixBase<LatticeFermion>(&Fgrid,&Hgrid),
Stencil ( _grid,npoint,Even,directions,displacements),
StencilEven(_cbgrid,npoint,Even,directions,displacements), // source is Even
StencilOdd (_cbgrid,npoint,Odd ,directions,displacements), // source is Odd
mass(_mass),
Umu(_Umu._grid)
{
// Allocate the required comms buffer
grid = _Umu._grid;
comm_buf.resize(Stencil._unified_buffer_size);
DoubleStore(Umu,_Umu);
}
Umu(_grid),
UmuEven(_cbgrid),
UmuOdd (_cbgrid)
{
// Allocate the required comms buffer
comm_buf.resize(Stencil._unified_buffer_size); // this is always big enough to contain EO
DoubleStore(Umu,_Umu);
pickCheckerboard(Even,UmuEven,Umu);
pickCheckerboard(Odd ,UmuOdd,Umu);
}
void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
{
LatticeColourMatrix U(grid);
LatticeColourMatrix U(_grid);
for(int mu=0;mu<Nd;mu++){
U = peekIndex<LorentzIndex>(Umu,mu);
@ -97,48 +103,63 @@ void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeF
RealD WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out,0);
out.checkerboard=in.checkerboard;
Dhop(in,out,DaggerNo);
out = (4+mass)*in - 0.5*out ; // FIXME : axpby_norm! fusion fun
return norm2(out);
}
RealD WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out,1);
out.checkerboard=in.checkerboard;
Dhop(in,out,DaggerYes);
out = (4+mass)*in - 0.5*out ; // FIXME : axpby_norm! fusion fun
return norm2(out);
}
void WilsonMatrix::Meooe(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out,0);
out = 0.5*out; // FIXME : scale factor in Dhop
if ( in.checkerboard == Odd ) {
DhopEO(in,out,DaggerNo);
} else {
DhopOE(in,out,DaggerNo);
}
out = (-0.5)*out; // FIXME : scale factor in Dhop
}
void WilsonMatrix::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out,1);
if ( in.checkerboard == Odd ) {
DhopEO(in,out,DaggerYes);
} else {
DhopOE(in,out,DaggerYes);
}
out = (-0.5)*out; // FIXME : scale factor in Dhop
}
void WilsonMatrix::Mooee(const LatticeFermion &in, LatticeFermion &out)
{
out.checkerboard = in.checkerboard;
out = (4.0+mass)*in;
return ;
}
void WilsonMatrix::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
{
out = (1.0/(4.0+mass))*in;
return ;
}
void WilsonMatrix::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
{
out.checkerboard = in.checkerboard;
Mooee(in,out);
}
void WilsonMatrix::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
{
out.checkerboard = in.checkerboard;
out = (1.0/(4.0+mass))*in;
return ;
}
void WilsonMatrix::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
{
out = (1.0/(4.0+mass))*in;
return ;
out.checkerboard = in.checkerboard;
MooeeInv(in,out);
}
void WilsonMatrix::DhopSite(int ss,const LatticeFermion &in, LatticeFermion &out)
void WilsonMatrix::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
int ss,const LatticeFermion &in, LatticeFermion &out)
{
vHalfSpinColourVector tmp;
vHalfSpinColourVector chi;
@ -146,79 +167,78 @@ void WilsonMatrix::DhopSite(int ss,const LatticeFermion &in, LatticeFermion &out
vHalfSpinColourVector Uchi;
int offset,local,perm, ptype;
// int ss = Stencil._LebesgueReorder[sss];
int ssu= ss;
//#define VERBOSE( A) if ( ss<10 ) { std::cout << "site " <<ss << " " #A " neigh " << offset << " perm "<< perm <<std::endl;}
// Xp
offset = Stencil._offsets [Xp][ss];
local = Stencil._is_local[Xp][ss];
perm = Stencil._permute[Xp][ss];
offset = st._offsets [Xp][ss];
local = st._is_local[Xp][ss];
perm = st._permute[Xp][ss];
ptype = Stencil._permute_type[Xp];
ptype = st._permute_type[Xp];
if ( local && perm ) {
spProjXp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjXp(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Xp),&chi());
mult(&Uchi(),&U._odata[ss](Xp),&chi());
spReconXp(result,Uchi);
// Yp
offset = Stencil._offsets [Yp][ss];
local = Stencil._is_local[Yp][ss];
perm = Stencil._permute[Yp][ss];
ptype = Stencil._permute_type[Yp];
offset = st._offsets [Yp][ss];
local = st._is_local[Yp][ss];
perm = st._permute[Yp][ss];
ptype = st._permute_type[Yp];
if ( local && perm ) {
spProjYp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjYp(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Yp),&chi());
mult(&Uchi(),&U._odata[ss](Yp),&chi());
accumReconYp(result,Uchi);
// Zp
offset = Stencil._offsets [Zp][ss];
local = Stencil._is_local[Zp][ss];
perm = Stencil._permute[Zp][ss];
ptype = Stencil._permute_type[Zp];
offset = st._offsets [Zp][ss];
local = st._is_local[Zp][ss];
perm = st._permute[Zp][ss];
ptype = st._permute_type[Zp];
if ( local && perm ) {
spProjZp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjZp(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Zp),&chi());
mult(&Uchi(),&U._odata[ss](Zp),&chi());
accumReconZp(result,Uchi);
// Tp
offset = Stencil._offsets [Tp][ss];
local = Stencil._is_local[Tp][ss];
perm = Stencil._permute[Tp][ss];
ptype = Stencil._permute_type[Tp];
offset = st._offsets [Tp][ss];
local = st._is_local[Tp][ss];
perm = st._permute[Tp][ss];
ptype = st._permute_type[Tp];
if ( local && perm ) {
spProjTp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjTp(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Tp),&chi());
mult(&Uchi(),&U._odata[ss](Tp),&chi());
accumReconTp(result,Uchi);
// Xm
offset = Stencil._offsets [Xm][ss];
local = Stencil._is_local[Xm][ss];
perm = Stencil._permute[Xm][ss];
ptype = Stencil._permute_type[Xm];
offset = st._offsets [Xm][ss];
local = st._is_local[Xm][ss];
perm = st._permute[Xm][ss];
ptype = st._permute_type[Xm];
if ( local && perm )
{
@ -227,17 +247,17 @@ void WilsonMatrix::DhopSite(int ss,const LatticeFermion &in, LatticeFermion &out
} else if ( local ) {
spProjXm(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Xm),&chi());
mult(&Uchi(),&U._odata[ss](Xm),&chi());
accumReconXm(result,Uchi);
// Ym
offset = Stencil._offsets [Ym][ss];
local = Stencil._is_local[Ym][ss];
perm = Stencil._permute[Ym][ss];
ptype = Stencil._permute_type[Ym];
offset = st._offsets [Ym][ss];
local = st._is_local[Ym][ss];
perm = st._permute[Ym][ss];
ptype = st._permute_type[Ym];
if ( local && perm ) {
spProjYm(tmp,in._odata[offset]);
@ -245,46 +265,49 @@ void WilsonMatrix::DhopSite(int ss,const LatticeFermion &in, LatticeFermion &out
} else if ( local ) {
spProjYm(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Ym),&chi());
mult(&Uchi(),&U._odata[ss](Ym),&chi());
accumReconYm(result,Uchi);
// Zm
offset = Stencil._offsets [Zm][ss];
local = Stencil._is_local[Zm][ss];
perm = Stencil._permute[Zm][ss];
ptype = Stencil._permute_type[Zm];
offset = st._offsets [Zm][ss];
local = st._is_local[Zm][ss];
perm = st._permute[Zm][ss];
ptype = st._permute_type[Zm];
if ( local && perm ) {
spProjZm(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjZm(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Zm),&chi());
mult(&Uchi(),&U._odata[ss](Zm),&chi());
accumReconZm(result,Uchi);
// Tm
offset = Stencil._offsets [Tm][ss];
local = Stencil._is_local[Tm][ss];
perm = Stencil._permute[Tm][ss];
ptype = Stencil._permute_type[Tm];
offset = st._offsets [Tm][ss];
local = st._is_local[Tm][ss];
perm = st._permute[Tm][ss];
ptype = st._permute_type[Tm];
if ( local && perm ) {
spProjTm(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjTm(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Tm),&chi());
mult(&Uchi(),&U._odata[ss](Tm),&chi());
accumReconTm(result,Uchi);
vstream(out._odata[ss],result);
}
void WilsonMatrix::DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &out)
void WilsonMatrix::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
int ss,const LatticeFermion &in, LatticeFermion &out)
{
vHalfSpinColourVector tmp;
vHalfSpinColourVector chi;
@ -292,78 +315,76 @@ void WilsonMatrix::DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &
vHalfSpinColourVector Uchi;
int offset,local,perm, ptype;
int ssu= ss;
// Xp
offset = Stencil._offsets [Xm][ss];
local = Stencil._is_local[Xm][ss];
perm = Stencil._permute[Xm][ss];
offset = st._offsets [Xm][ss];
local = st._is_local[Xm][ss];
perm = st._permute[Xm][ss];
ptype = Stencil._permute_type[Xm];
ptype = st._permute_type[Xm];
if ( local && perm ) {
spProjXp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjXp(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Xm),&chi());
mult(&Uchi(),&U._odata[ss](Xm),&chi());
spReconXp(result,Uchi);
// Yp
offset = Stencil._offsets [Ym][ss];
local = Stencil._is_local[Ym][ss];
perm = Stencil._permute[Ym][ss];
ptype = Stencil._permute_type[Ym];
offset = st._offsets [Ym][ss];
local = st._is_local[Ym][ss];
perm = st._permute[Ym][ss];
ptype = st._permute_type[Ym];
if ( local && perm ) {
spProjYp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjYp(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Ym),&chi());
mult(&Uchi(),&U._odata[ss](Ym),&chi());
accumReconYp(result,Uchi);
// Zp
offset = Stencil._offsets [Zm][ss];
local = Stencil._is_local[Zm][ss];
perm = Stencil._permute[Zm][ss];
ptype = Stencil._permute_type[Zm];
offset = st._offsets [Zm][ss];
local = st._is_local[Zm][ss];
perm = st._permute[Zm][ss];
ptype = st._permute_type[Zm];
if ( local && perm ) {
spProjZp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjZp(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Zm),&chi());
mult(&Uchi(),&U._odata[ss](Zm),&chi());
accumReconZp(result,Uchi);
// Tp
offset = Stencil._offsets [Tm][ss];
local = Stencil._is_local[Tm][ss];
perm = Stencil._permute[Tm][ss];
ptype = Stencil._permute_type[Tm];
offset = st._offsets [Tm][ss];
local = st._is_local[Tm][ss];
perm = st._permute[Tm][ss];
ptype = st._permute_type[Tm];
if ( local && perm ) {
spProjTp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjTp(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Tm),&chi());
mult(&Uchi(),&U._odata[ss](Tm),&chi());
accumReconTp(result,Uchi);
// Xm
offset = Stencil._offsets [Xp][ss];
local = Stencil._is_local[Xp][ss];
perm = Stencil._permute[Xp][ss];
ptype = Stencil._permute_type[Xp];
offset = st._offsets [Xp][ss];
local = st._is_local[Xp][ss];
perm = st._permute[Xp][ss];
ptype = st._permute_type[Xp];
if ( local && perm )
{
@ -372,17 +393,16 @@ void WilsonMatrix::DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &
} else if ( local ) {
spProjXm(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Xp),&chi());
mult(&Uchi(),&U._odata[ss](Xp),&chi());
accumReconXm(result,Uchi);
// Ym
offset = Stencil._offsets [Yp][ss];
local = Stencil._is_local[Yp][ss];
perm = Stencil._permute[Yp][ss];
ptype = Stencil._permute_type[Yp];
offset = st._offsets [Yp][ss];
local = st._is_local[Yp][ss];
perm = st._permute[Yp][ss];
ptype = st._permute_type[Yp];
if ( local && perm ) {
spProjYm(tmp,in._odata[offset]);
@ -390,69 +410,97 @@ void WilsonMatrix::DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &
} else if ( local ) {
spProjYm(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Yp),&chi());
mult(&Uchi(),&U._odata[ss](Yp),&chi());
accumReconYm(result,Uchi);
// Zm
offset = Stencil._offsets [Zp][ss];
local = Stencil._is_local[Zp][ss];
perm = Stencil._permute[Zp][ss];
ptype = Stencil._permute_type[Zp];
offset = st._offsets [Zp][ss];
local = st._is_local[Zp][ss];
perm = st._permute[Zp][ss];
ptype = st._permute_type[Zp];
if ( local && perm ) {
spProjZm(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjZm(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Zp),&chi());
mult(&Uchi(),&U._odata[ss](Zp),&chi());
accumReconZm(result,Uchi);
// Tm
offset = Stencil._offsets [Tp][ss];
local = Stencil._is_local[Tp][ss];
perm = Stencil._permute[Tp][ss];
ptype = Stencil._permute_type[Tp];
offset = st._offsets [Tp][ss];
local = st._is_local[Tp][ss];
perm = st._permute[Tp][ss];
ptype = st._permute_type[Tp];
if ( local && perm ) {
spProjTm(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjTm(chi,in._odata[offset]);
} else {
chi=comm_buf[offset];
chi=buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Tp),&chi());
mult(&Uchi(),&U._odata[ss](Tp),&chi());
accumReconTm(result,Uchi);
vstream(out._odata[ss],result);
}
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
void WilsonMatrix::DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField & U,
const LatticeFermion &in, LatticeFermion &out,int dag)
{
assert((dag==0) ||(dag==1));
assert((dag==DaggerNo) ||(dag==DaggerYes));
WilsonCompressor compressor(dag);
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
if ( dag ) {
st.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
if ( dag == DaggerYes ) {
PARALLEL_FOR_LOOP
for(int sss=0;sss<grid->oSites();sss++){
DhopSiteDag(sss,in,out);
for(int sss=0;sss<in._grid->oSites();sss++){
DhopSiteDag(st,U,comm_buf,sss,in,out);
}
} else {
PARALLEL_FOR_LOOP
for(int sss=0;sss<grid->oSites();sss++){
DhopSite(sss,in,out);
for(int sss=0;sss<in._grid->oSites();sss++){
DhopSite(st,U,comm_buf,sss,in,out);
}
}
}
void WilsonMatrix::DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag)
{
conformable(in._grid,_cbgrid); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
assert(in.checkerboard==Even);
out.checkerboard = Odd;
DhopInternal(StencilEven,UmuOdd,in,out,dag);
}
void WilsonMatrix::DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag)
{
conformable(in._grid,_cbgrid); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
assert(in.checkerboard==Odd);
out.checkerboard = Even;
DhopInternal(StencilOdd,UmuEven,in,out,dag);
}
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
{
conformable(in._grid,_grid); // verifies full grid
conformable(in._grid,out._grid);
out.checkerboard = in.checkerboard;
DhopInternal(Stencil,Umu,in,out,dag);
}
}}

View File

@ -11,14 +11,20 @@ namespace Grid {
//NB r=1;
public:
double mass;
GridBase *grid;
// GridBase * grid; // Inherited
// GridBase * cbgrid;
// Copy of the gauge field
LatticeDoubledGaugeField Umu;
//Defines the stencils for even and odd
CartesianStencil Stencil;
CartesianStencil StencilEven;
CartesianStencil StencilOdd;
//Defines the stencil
CartesianStencil Stencil;
static const int npoint=9;
// Copy of the gauge field , with even and odd subsets
LatticeDoubledGaugeField Umu;
LatticeDoubledGaugeField UmuEven;
LatticeDoubledGaugeField UmuOdd;
static const int npoint=8;
static const std::vector<int> directions ;
static const std::vector<int> displacements;
static const int Xp,Xm,Yp,Ym,Zp,Zm,Tp,Tm;
@ -27,7 +33,7 @@ namespace Grid {
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > comm_buf;
// Constructor
WilsonMatrix(LatticeGaugeField &Umu,double mass);
WilsonMatrix(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid,double _mass);
// DoubleStore
void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
@ -45,9 +51,19 @@ namespace Grid {
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
// non-hermitian hopping term; half cb or both
void Dhop(const LatticeFermion &in, LatticeFermion &out,int dag);
void DhopSite (int ss,const LatticeFermion &in, LatticeFermion &out);
void DhopSiteDag(int ss,const LatticeFermion &in, LatticeFermion &out);
void Dhop (const LatticeFermion &in, LatticeFermion &out,int dag);
void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag);
void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag);
void DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField &U,
const LatticeFermion &in, LatticeFermion &out,int dag);
// These ones will need to be package intelligently. WilsonType base class
// for use by DWF etc..
void DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
int ss,const LatticeFermion &in, LatticeFermion &out);
void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
int ss,const LatticeFermion &in, LatticeFermion &out);
typedef iScalar<iMatrix<vComplex, Nc> > matrix;

View File

@ -283,6 +283,7 @@ namespace Optimization {
//////////////////////////////////////////////////////////////////////////////////////
// Here assign types
namespace Grid {
typedef __m128 SIMD_Ftype; // Single precision type
typedef __m128d SIMD_Dtype; // Double precision type
typedef __m128i SIMD_Itype; // Integer type

View File

@ -2,7 +2,7 @@
/*! @file Grid_vector_types.h
@brief Defines templated class Grid_simd to deal with inner vector types
*/
// Time-stamp: <2015-05-22 17:08:19 neo>
// Time-stamp: <2015-05-26 12:05:39 neo>
//---------------------------------------------------------------------------
#ifndef GRID_VECTOR_TYPES
#define GRID_VECTOR_TYPES
@ -21,31 +21,24 @@
namespace Grid {
// To take the floating point type of real/complex type
template <typename T>
struct RealPart {
typedef T type;
};
template <typename T>
struct RealPart< std::complex<T> >{
template <typename T> struct RealPart {
typedef T type;
};
template <typename T> struct RealPart< std::complex<T> >{
typedef T type;
};
// type alias used to simplify the syntax of std::enable_if
template <typename T> using Invoke =
typename T::type;
template <typename Condition, typename ReturnType> using EnableIf =
Invoke<std::enable_if<Condition::value, ReturnType>>;
template <typename Condition, typename ReturnType> using NotEnableIf =
Invoke<std::enable_if<!Condition::value, ReturnType>>;
template <typename T> using Invoke = typename T::type;
template <typename Condition, typename ReturnType> using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType>>;
template <typename Condition, typename ReturnType> using NotEnableIf= Invoke<std::enable_if<!Condition::value, ReturnType>>;
////////////////////////////////////////////////////////
// Check for complexity with type traits
template <typename T>
struct is_complex : std::false_type {};
template < typename T >
struct is_complex< std::complex<T> >: std::true_type {};
template <typename T> struct is_complex : std::false_type {};
template < typename T > struct is_complex< std::complex<T> >: std::true_type {};
////////////////////////////////////////////////////////
// Define the operation templates functors
// general forms to allow for vsplat syntax
@ -102,8 +95,6 @@ namespace Grid {
Grid_simd(const Real a){
vsplat(*this,Scalar_type(a));
};
///////////////////////////////////////////////
// mac, mult, sub, add, adj
@ -145,10 +136,6 @@ namespace Grid {
friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vfalse(Grid_simd &ret){vsplat(ret,0);}
////////////////////////////////////
// Arithmetic operator overloads +,-,*
@ -184,7 +171,6 @@ namespace Grid {
ret.v = binary<Vector_type>(a.v,b.v, MultSIMD());
return ret;
};
////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch

View File

@ -345,20 +345,30 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
// REDUCE FIXME must be a cleaner implementation
friend inline ComplexD Reduce(const vComplexD & in)
{
vComplexD v1,v2;
union {
zvec v;
double f[sizeof(zvec)/sizeof(double)];
} conv;
#ifdef SSE4
return ComplexD(in.v[0],in.v[1]);
v1=in;
#endif
#if defined(AVX1) || defined (AVX2)
vComplexD v1;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
return ComplexD(v1.v[0],v1.v[1]);
#endif
#ifdef AVX512
return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
permute(v2,v1,1); // avx 256; quad complex single
v1=v1+v2;
#endif
#ifdef QPX
#error
#endif
conv.v = v1.v;
return ComplexD(conv.f[0],conv.f[1]);
}
// Unary negation

View File

@ -234,26 +234,34 @@ namespace Grid {
}
friend inline ComplexF Reduce(const vComplexF & in)
{
vComplexF v1,v2;
union {
cvec v;
float f[sizeof(cvec)/sizeof(float)];
} conv;
#ifdef SSE4
vComplexF v1;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
return ComplexF(v1.v[0],v1.v[1]);
#endif
#if defined(AVX1) || defined (AVX2)
vComplexF v1,v2;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
permute(v2,v1,1); // avx 256; quad complex single
v1=v1+v2;
return ComplexF(v1.v[0],v1.v[1]);
#endif
#ifdef AVX512
return ComplexF(_mm512_mask_reduce_add_ps(0x5555, in.v),_mm512_mask_reduce_add_ps(0xAAAA, in.v));
permute(v1,in,0); // avx512 octo-complex single
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
#endif
#ifdef QPX
#error
#endif
conv.v = v1.v;
return ComplexF(conv.f[0],conv.f[1]);
}
friend inline vComplexF operator * (const ComplexF &a, vComplexF b){

View File

@ -210,25 +210,33 @@ namespace Grid {
friend inline RealD Reduce(const vRealD & in)
{
vRealD v1,v2;
union {
dvec v;
double f[sizeof(dvec)/sizeof(double)];
} conv;
#ifdef SSE4
vRealD v1;
permute(v1,in,0); // sse 128; paired real double
v1=v1+in;
return RealD(v1.v[0]);
#endif
#if defined(AVX1) || defined (AVX2)
vRealD v1,v2;
permute(v1,in,0); // avx 256; quad double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
return v1.v[0];
#endif
#ifdef AVX512
return _mm512_reduce_add_pd(in.v);
permute(v1,in,0); // avx 512; octo-double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
#endif
#ifdef QPX
#endif
conv.v=v1.v;
return conv.f[0];
}
// *=,+=,-= operators

View File

@ -243,29 +243,39 @@ friend inline void vstore(const vRealF &ret, float *a){
}
friend inline RealF Reduce(const vRealF & in)
{
#ifdef SSE4
vRealF v1,v2;
union {
fvec v;
float f[sizeof(fvec)/sizeof(double)];
} conv;
#ifdef SSE4
permute(v1,in,0); // sse 128; quad single
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
return v1.v[0];
#endif
#if defined(AVX1) || defined (AVX2)
vRealF v1,v2;
permute(v1,in,0); // avx 256; octo-double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
return v1.v[0];
#endif
#ifdef AVX512
return _mm512_reduce_add_ps(in.v);
permute(v1,in,0); // avx 256; octo-double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
permute(v2,v1,3);
v1=v1+v2;
#endif
#ifdef QPX
#endif
conv.v=v1.v;
return conv.f[0];
}
// *=,+=,-= operators

View File

@ -1,6 +1,6 @@
#!/bin/bash
DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi icpc-avx icpc-avx2 icpc-avx512 g++-sse4 g++-avx clang-sse"
DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi icpc-avx icpc-avx2 icpc-avx512 g++-sse4 g++-avx clang-sse icpc-avx-openmp-mpi icpc-avx-openmp"
for D in $DIRS
do

View File

@ -25,6 +25,12 @@ g++-avx)
icpc-avx)
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
;;
icpc-avx-openmp-mpi)
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp -lgmp -lmpfr" --enable-comms=mpi
;;
icpc-avx-openmp)
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -std=c++11" LIBS="-fopenmp -lgmp -lmpfr" --enable-comms=mpi
;;
icpc-avx2)
CXX=icpc ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
;;

View File

@ -1,8 +1,9 @@
#include "Grid.h"
//DEBUG
#ifdef SSE4
#include "simd/Grid_vector_types.h"
#endif
using namespace std;
using namespace Grid;

View File

@ -13,7 +13,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> latt_size ({16,16,16,32});
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> clatt_size ({4,4,4,8});
int orthodir=3;
int orthosz =latt_size[orthodir];
@ -44,13 +44,15 @@ int main (int argc, char ** argv)
// (1+2+3)=6 = N(N-1)/2 terms
LatticeComplex Plaq(&Fine);
LatticeComplex cPlaq(&Coarse);
Plaq = zero;
#if 1
for(int mu=1;mu<Nd;mu++){
for(int nu=0;nu<mu;nu++){
Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
}
}
#endif
double vol = Fine.gSites();
Complex PlaqScale(1.0/vol/6.0/3.0);
std::cout <<"PlaqScale" << PlaqScale<<std::endl;

0
tests/InvSqrt.gnu Normal file
View File

2
tests/Sqrt.gnu Normal file
View File

@ -0,0 +1,2 @@
f(x) = 6.81384+(-2.34645e-06/(x+0.000228091))+(-1.51593e-05/(x+0.00112084))+(-6.89254e-05/(x+0.003496))+(-0.000288983/(x+0.00954309))+(-0.00119277/(x+0.024928))+(-0.0050183/(x+0.0646627))+(-0.0226449/(x+0.171576))+(-0.123767/(x+0.491792))+(-1.1705/(x+1.78667))+(-102.992/(x+18.4866));
f(x) = 0.14676+(0.00952992/(x+5.40933e-05))+(0.0115952/(x+0.000559699))+(0.0161824/(x+0.00203338))+(0.0243252/(x+0.00582831))+(0.0379533/(x+0.0154649))+(0.060699/(x+0.0401156))+(0.100345/(x+0.104788))+(0.178335/(x+0.286042))+(0.381586/(x+0.892189))+(1.42625/(x+4.38422));