1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00
This commit is contained in:
Jung 2015-12-03 12:11:10 -05:00
commit fb81acca3c
233 changed files with 33004 additions and 12555 deletions

64
.gitignore vendored
View File

@ -1,55 +1,66 @@
# Compiled Object files
# Compiled Object files #
#########################
*.slo
*.lo
*.o
*.obj
# Editor files #
################
*~
errs
*#
# Precompiled Headers
# Precompiled Headers #
#######################
*.gch
*.pch
# Compiled Dynamic libraries
# Compiled Dynamic libraries #
##############################
*.so
*.dylib
*.dll
# Fortran module files
# Fortran module files #
########################
*.mod
# Compiled Static libraries
# Compiled Static libraries #
#############################
*.lai
*.la
*.a
*.lib
# Executables
# Executables #
###############
*.exe
*.out
*.app
# http://www.gnu.org/software/automake
# http://www.gnu.org/software/automake #
########################################
Makefile.in
Makefile
Config.h
config.log
config.status
.deps
# http://www.gnu.org/software/autoconf
/autom4te.cache
/aclocal.m4
/compile
/configure
/depcomp
/install-sh
/missing
/stamp-h1
/config.sub
/config.guess
/INSTALL
# http://www.gnu.org/software/autoconf #
########################################
autom4te.cache
aclocal.m4
compile
configure
depcomp
install-sh
missing
stamp-h1
config.sub
config.guess
INSTALL
# Packages #
############
@ -78,4 +89,13 @@ config.status
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
Thumbs.db
# build directory #
###################
build/*
# IDE related files #
#####################
*.xcodeproj/*
build.sh

View File

@ -1 +0,0 @@
/usr/share/automake-1.14/INSTALL

125
TODO
View File

@ -1,15 +1,61 @@
RECENT
---------------
- Clean up HMC -- DONE
- LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE
- Simplified the integrators a bit. -- DONE
- Multi-timescale looks broken and operating on single timescale for now. -- DONE
- pass GaugeField as template param. -- DONE
- Reunitarise -- DONE
- Force Gradient -- DONE
- Prefer "RefreshInternal" or such like to "init" in naming -- DONE
- Parallel io improvements -- DONE
- Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
TODO:
---------------
Policies:
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
* Support different boundary conditions (finite temp, chem. potential ... )
* Support different fermion representations?
- contained entirely within the integrator presently
- Sign of force term.
- Reversibility test.
- Rename "Ta" as too unclear
- Lanczos
- Rectangle gauge actions.
Iwasaki,
Symanzik,
... etc...
- Prepare multigrid for HMC. - Alternate setup schemes.
- Support for ILDG --- ugly, not done
- Flavour matrices?
- FFTnD ?
================================================================
*** Hacks and bug fixes to clean up and Audits
* Hacks and bug fixes to clean up and Audits
================================================================
* Extract/merge/set cleanup ; too many variants; rationalise and call simpler ones
* Used #define repetitive sequences to minimise code.
* Rewrite core tensor arithmetic support to be more systematic
= Use #define repetitive sequences to minimise code, decrease line count by thousands possible,
with more robust and maintainable implementation.
* Ensure we ET as much as possible; move unop functions into ET framework.
- tests with expression args to all functions
* FIXME audit
* const audit
Insert/Extract
@ -22,10 +68,12 @@ Insert/Extract
* Thread scaling tests Xeon, XeonPhi
** Make the Tensor types and Complex etc... play more nicely.
Not sure of status of this -- reverify. Things are working nicely now though.
* Make the Tensor types and Complex etc... play more nicely.
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
want to introduce a syntax that does not require this.
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
want to introduce a syntax that does not require this.
- Reductions that contract indices on a site should always demote the tensor structure.
norm2(), innerProduct.
@ -38,6 +86,37 @@ Insert/Extract
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
pure scalar.
======================================================================
======================================================================
======================================================================
======================================================================
Done: Cayley, Partial , ContFrac force terms.
DONE
- PseudoFermions
=> generalise to non-const EE ; likely defer (??) (NOT DONE)
Done:
- TwoFlavour
- TwoFlavourEvenOdd
- TwoFlavourRatio
- TwoFlavourRatioEvenOdd
Done:
- OneFlavourRationalEvenOdd
- OneFlavourRationalRatioEvenOdd
- OneFlavourRationalRatio
Done
=> Test DWF HMC
- Fix a threading bug that has been introduced and prevents HMC running hybrid OMP mode
Done:
- RNG filling from sparser grid, lower dim grid.
DONE
- MacroMagic -> virtual reader class.
*** Expression template engine: -- DONE
[ -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
@ -54,28 +133,13 @@ Insert/Extract
// localMaxAbs
// Fourier transform equivalent.]
================================================================
*** New Functionality
================================================================
* - BinaryWriter, TextWriter etc...
- use protocol buffers? replace xmlReader/Writer ec..
- Binary use htonll, htonl
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
* Parallel io improvements
- optional parallel MPI2 IO
- move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
* Support for ILDG
* Support different boundary conditions (finite temp, chem. potential ... )
* Support different fermion representations?
Actions -- coherent framework for implementing actions and their forces.
-- coherent framework for implementing actions and their forces.
Actions
DONE
* Fermion
- Wilson
- Clover
@ -83,6 +147,7 @@ Actions -- coherent framework for implementing actions and their forces.
- Mobius
- z-Mobius
Algorithms (lots of reuse/port from BFM)
* LinearOperator
* LinearSolver
@ -100,17 +165,10 @@ Algorithms (lots of reuse/port from BFM)
* Integrators, leapfrog, omelyan, force gradient etc...
* etc..
* Gauge
- Wilson, symanzik, iwasaki
* rb4d support for 5th dimension in Mobius.
* Flavour matrices?
Done
* Pauli, SU subgroup, etc..
* su3 exponentiation & log etc.. [Jamie's code?]
* TaProj
* FFTnD ?
* su3 exponentiation & log etc.. [Jamie's code?]
======================================================================================================
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
@ -144,7 +202,6 @@ FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me argua
- lib/communicator
- lib/algorithms
- lib/qcd
future
- lib/io/ -- GridLog, GridIn, GridErr, GridDebug, GridMessage
- lib/qcd/actions
- lib/qcd/measurements

View File

@ -11,22 +11,25 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
int threads = GridThread::GetThreads();
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
int Nloop=10;
int nmu=0;
for(int mu=0;mu<4;mu++) if (mpi_layout[mu]>1) nmu++;
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=32;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
lat*mpi_layout[2],
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
@ -84,15 +87,15 @@ int main (int argc, char ** argv)
double time = stop-start; // microseconds
std::cout << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=32;lat+=2){
@ -160,7 +163,7 @@ int main (int argc, char ** argv)
double time = stop-start;
std::cout << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}

View File

@ -21,11 +21,11 @@ int main (int argc, char ** argv)
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt4 = GridDefaultLatt();
const int Ls=8;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
const int Ls=16;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
@ -59,7 +59,7 @@ int main (int argc, char ** argv)
////////////////////////////////////
std::vector<LatticeColourMatrix> U(4,FGrid);
for(int mu=0;mu<Nd;mu++){
U[mu] = peekIndex<LorentzIndex>(Umu5d,mu);
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
}
if (1)
@ -79,25 +79,28 @@ int main (int argc, char ** argv)
RealD mass=0.1;
RealD M5 =1.8;
DomainWallFermion Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout << "Calling Dw"<<std::endl;
int ncall=10;
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.Dhop(src,result,0);
}
double t1=usecond();
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall=10000;
{
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.Dhop(src,result,0);
}
double t1=usecond();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout << "Called Dw"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
Dw.Report();
}
if (1)
@ -120,11 +123,11 @@ int main (int argc, char ** argv)
ref = -0.5*ref;
}
Dw.Dhop(src,result,1);
std::cout << "Called DwDag"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
err = ref-result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
LatticeFermion src_e (FrbGrid);
LatticeFermion src_o (FrbGrid);
@ -133,24 +136,44 @@ int main (int argc, char ** argv)
LatticeFermion r_eo (FGrid);
std::cout << "Calling Deo and Doe"<<std::endl;
std::cout<<GridLogMessage << "Calling Deo and Doe"<<std::endl;
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd,src_o,src);
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
{
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.DhopEO(src_o,r_e,DaggerNo);
}
double t1=usecond();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2;
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
}
Dw.DhopEO(src_o,r_e,DaggerNo);
Dw.DhopOE(src_e,r_o,DaggerNo);
Dw.Dhop(src,result,DaggerNo);
Dw.Dhop (src ,result,DaggerNo);
std::cout<<GridLogMessage << "r_e"<<norm2(r_e)<<std::endl;
std::cout<<GridLogMessage << "r_o"<<norm2(r_o)<<std::endl;
std::cout<<GridLogMessage << "res"<<norm2(result)<<std::endl;
setCheckerboard(r_eo,r_o);
setCheckerboard(r_eo,r_e);
err = r_eo-result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
pickCheckerboard(Even,src_e,err);
pickCheckerboard(Odd,src_o,err);
std::cout << "norm diff even "<< norm2(src_e)<<std::endl;
std::cout << "norm diff odd "<< norm2(src_o)<<std::endl;
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,84 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
const int Nvec=8;
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
typedef iVector<vReal,Nvec> Vec;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking READ bandwidth"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"bytes/thread"<<"\t\t\t"<<"GB/s"<<"\t\t\t"<<"GB/s per thread"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
const int lmax = 16536*16;
for(int lat=4;lat<=lmax;lat*=2){
int Nloop=lmax*4/lat;
std::vector<int> latt_size ({2*mpi_layout[0],2*mpi_layout[1],4*mpi_layout[2],lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]*threads;
Vec tsum; tsum = zero;
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
std::vector<double> stop(threads);
Vector<Vec> sum(threads);
std::vector<LatticeVec> x(threads,&Grid);
for(int t=0;t<threads;t++){
// random(pRNG,x[t]);
}
double start=usecond();
PARALLEL_FOR_LOOP
for(int t=0;t<threads;t++){
sum[t] = x[t]._odata[0];
for(int i=0;i<Nloop;i++){
for(auto ss=x[t].begin();ss<x[t].end();ss++){
sum[t] = sum[t]+x[t]._odata[ss];
}
}
stop[t]=usecond();
}
double max_stop=stop[0];
double min_stop=stop[0];
for(int t=0;t<threads;t++){
tsum+=sum[t];
if ( stop[t]<min_stop ) min_stop=stop[t];
if ( stop[t]>max_stop ) max_stop=stop[t];
}
double max_time = (max_stop-start)/Nloop*1000;
double min_time = (min_stop-start)/Nloop*1000;
double bytes=vol*Nvec*sizeof(Real);
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/threads<<"\t\t"<<bytes/max_time<<" - "<< bytes/min_time<<"\t\t"<<bytes/min_time/threads <<std::endl;
}
Grid_finalize();
}

View File

@ -10,28 +10,34 @@ int main (int argc, char ** argv)
const int Nvec=8;
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
typedef iVector<vReal,Nvec> Vec;
int Nloop=1000;
Vec rn = zero;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
int threads = GridThread::GetThreads();
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking fused AXPY bandwidth"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
for(int lat=4;lat<=32;lat+=4){
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking fused AXPY bandwidth ; sizeof(Real) "<<sizeof(Real)<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t lmax=44;
#define NLOOP (1*lmax*lmax*lmax*lmax/vol)
for(int lat=4;lat<=lmax;lat+=4){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
uint64_t Nloop=NLOOP;
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
@ -41,63 +47,70 @@ int main (int argc, char ** argv)
double start=usecond();
for(int i=0;i<Nloop;i++){
// inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
axpy(z,a,x,y);
x._odata[0]=z._odata[0]; // serial loop dependence to prevent optimise
y._odata[4]=z._odata[4];
}
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double flops=vol*Nvec*2;// mul,add
double bytes=3*vol*Nvec*sizeof(Real);
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking a*x + y bandwidth"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=4;lat<=32;lat+=4){
for(int lat=4;lat<=lmax;lat+=4){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
LatticeVec y(&Grid); //random(pRNG,y);
double a=2.0;
uint64_t Nloop=NLOOP;
double start=usecond();
for(int i=0;i<Nloop;i++){
z=a*x-y;
x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away
y._odata[4]=z._odata[4];
}
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double flops=vol*Nvec*2;// mul,add
double bytes=3*vol*Nvec*sizeof(Real);
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking SCALE bandwidth"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SCALE bandwidth"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
for(int lat=4;lat<=lmax;lat+=4){
for(int lat=4;lat<=32;lat+=4){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
uint64_t Nloop=NLOOP;
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
@ -108,46 +121,47 @@ int main (int argc, char ** argv)
double start=usecond();
for(int i=0;i<Nloop;i++){
z=a*x;
x._odata[0]=z._odata[0]*2.0;
}
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double bytes=2*vol*Nvec*sizeof(Real);
double flops=vol*Nvec*1;// mul
std::cout <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking READ bandwidth"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking READ bandwidth"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=4;lat<=32;lat+=4){
for(int lat=4;lat<=lmax;lat+=4){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
uint64_t Nloop=NLOOP;
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
LatticeVec y(&Grid); //random(pRNG,y);
RealD a=2.0;
ComplexD nn;
Real nn;
double start=usecond();
for(int i=0;i<Nloop;i++){
nn=norm2(x);
vsplat(x._odata[0]._internal[0],nn);
}
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double bytes=vol*Nvec*sizeof(Real);
double flops=vol*Nvec*2;// mul,add
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl;
}

View File

@ -14,15 +14,15 @@ int main (int argc, char ** argv)
std::vector<int> mpi_layout = GridDefaultMpi();
int threads = GridThread::GetThreads();
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=24;lat+=2){
for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -43,18 +43,18 @@ int main (int argc, char ** argv)
double bytes=3.0*vol*Nc*Nc*sizeof(Complex);
double footprint=2.0*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6.0+8.0+8.0)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<footprint<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<footprint<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking SU3xSU3 z= x*y"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x*y"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=24;lat+=2){
for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -75,17 +75,17 @@ int main (int argc, char ** argv)
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking SU3xSU3 mult(z,x,y)"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 mult(z,x,y)"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=24;lat+=2){
for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -106,17 +106,17 @@ int main (int argc, char ** argv)
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking SU3xSU3 mac(z,x,y)"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 mac(z,x,y)"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=24;lat+=2){
for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -137,7 +137,7 @@ int main (int argc, char ** argv)
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(8+8+8)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}

View File

@ -22,13 +22,16 @@ int main (int argc, char ** argv)
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
int threads = GridThread::GetThreads();
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
@ -55,15 +58,15 @@ int main (int argc, char ** argv)
for(int nn=0;nn<Nd;nn++){
random(pRNG,U[nn]);
if(0) {
if (nn==-1) { U[nn]=zero; std::cout << "zeroing gauge field in dir "<<nn<<std::endl; }
else { U[nn] = cone;std::cout << "unit gauge field in dir "<<nn<<std::endl; }
if (nn==-1) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
else { U[nn] = cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
}
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
}
#endif
for(int mu=0;mu<Nd;mu++){
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
}
{ // Naive wilson implementation
@ -84,10 +87,10 @@ int main (int argc, char ** argv)
}
ref = -0.5*ref;
RealD mass=0.1;
WilsonFermion Dw(Umu,Grid,RBGrid,mass);
WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
std::cout << "Calling Dw"<<std::endl;
int ncall=10000;
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall=1000;
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.Dhop(src,result,0);
@ -95,12 +98,12 @@ int main (int argc, char ** argv)
double t1=usecond();
double flops=1344*volume*ncall;
std::cout << "Called Dw"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Called Dw"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
// for(int ss=0;ss<10;ss++ ){
@ -109,7 +112,7 @@ int main (int argc, char ** argv)
for(int j=0;j<Nc;j++){
ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j);
ComplexF * res_p = (ComplexF *)&result._odata[ss]()(i)(j);
std::cout << ss<< " "<<i<<" "<<j<<" "<< (*ref_p)<<" " <<(*res_p)<<std::endl;
std::cout<<GridLogMessage << ss<< " "<<i<<" "<<j<<" "<< (*ref_p)<<" " <<(*res_p)<<std::endl;
}
}
}
@ -133,11 +136,11 @@ int main (int argc, char ** argv)
}
ref = -0.5*ref;
Dw.Dhop(src,result,1);
std::cout << "Called DwDag"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
err = ref-result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
Grid_finalize();
}

View File

@ -1,5 +1,5 @@
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
Benchmark_comms_SOURCES=Benchmark_comms.cc
@ -10,6 +10,10 @@ Benchmark_dwf_SOURCES=Benchmark_dwf.cc
Benchmark_dwf_LDADD=-lGrid
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
Benchmark_memory_asynch_LDADD=-lGrid
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
Benchmark_memory_bandwidth_LDADD=-lGrid

8439
configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,7 @@
#
# Project Grid package
#
# Time-stamp: <2015-06-09 15:26:39 neo>
# Time-stamp: <2015-07-10 17:46:21 neo>
AC_PREREQ([2.63])
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
@ -11,7 +11,7 @@ AC_CANONICAL_SYSTEM
AM_INIT_AUTOMAKE(subdir-objects)
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/GridConfig.h])
AC_CONFIG_HEADERS([lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_MSG_NOTICE([
@ -26,10 +26,9 @@ AC_LANG(C++)
AC_PROG_CXX
AC_OPENMP
AC_PROG_RANLIB
AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
#AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
AX_EXT
# Checks for libraries.
#AX_GCC_VAR_ATTRIBUTE(aligned)
@ -39,6 +38,7 @@ AC_CHECK_HEADERS(mm_malloc.h)
AC_CHECK_HEADERS(malloc/malloc.h)
AC_CHECK_HEADERS(malloc.h)
AC_CHECK_HEADERS(endian.h)
AC_CHECK_HEADERS(execinfo.h)
AC_CHECK_HEADERS(gmp.h)
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
@ -56,19 +56,18 @@ echo :::::::::::::::::::::::::::::::::::::::::::
AC_CHECK_FUNCS([gettimeofday])
AC_CHECK_LIB([gmp],[__gmpf_init],,
[AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
Please install or provide the correct path to your installation
Info at: http://www.gmplib.org)])
#AC_CHECK_LIB([gmp],[__gmpf_init],,
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
#Please install or provide the correct path to your installation
#Info at: http://www.gmplib.org)])
AC_CHECK_LIB([mpfr],[mpfr_init],,
[AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
Please install or provide the correct path to your installation
Info at: http://www.mpfr.org/)])
#AC_CHECK_LIB([mpfr],[mpfr_init],,
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
#Please install or provide the correct path to your installation
#Info at: http://www.mpfr.org/)])
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
supported=no
@ -92,6 +91,15 @@ case ${ac_SIMD} in
AC_MSG_WARN([Your processor does not support AVX instructions])
fi
;;
AVXFMA4)
echo Configuring for AVX
AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] )
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX instructions])
fi
;;
AVX2)
echo Configuring for AVX2
AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
@ -101,14 +109,19 @@ case ${ac_SIMD} in
AC_MSG_WARN([Your processor does not support AVX2 instructions])
fi
;;
AVX512|MIC)
echo Configuring for AVX512 and MIC
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Corner] )
AVX512)
echo Configuring for AVX512
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
supported="cross compilation"
;;
NEONv7)
echo Configuring for experimental ARMv7 support
AC_DEFINE([NEONv7],[1],[NEON ARMv7 Experimental support ] )
IMCI)
echo Configuring for IMCI
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
supported="cross compilation"
;;
NEONv8)
echo Configuring for experimental ARMv8a support
AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] )
supported="cross compilation"
;;
DEBUG)
@ -120,6 +133,17 @@ case ${ac_SIMD} in
;;
esac
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
case ${ac_PRECISION} in
single)
echo default precision is single
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
;;
double)
echo default precision is double
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
;;
esac
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
@ -144,15 +168,15 @@ AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
###################################################################
# Checks for doxygen support
# if present enables the "make doxyfile" command
echo
echo Checking doxygen support
echo :::::::::::::::::::::::::::::::::::::::::::
AC_PROG_DOXYGEN
#echo
#echo Checking doxygen support
#echo :::::::::::::::::::::::::::::::::::::::::::
#AC_PROG_DOXYGEN
if test -n "$DOXYGEN"
then
AC_CONFIG_FILES([docs/doxy.cfg])
fi
#if test -n "$DOXYGEN"
#then
#AC_CONFIG_FILES([docs/doxy.cfg])
#fi
echo
echo Creating configuration files

View File

@ -29,12 +29,12 @@ public:
template<int N,class obj,typename std::enable_if<N==obj::NestLevel >::type * = nullptr > auto function(const obj &arg)-> obj
{
std::cout<<"Leaf "<<obj::NestLevel<<std::endl;
std::cout<<GridLogMessage<<"Leaf "<<obj::NestLevel<<std::endl;
return arg;
}
template<int N,class obj,typename std::enable_if<N!=obj::NestLevel >::type * = nullptr > auto function(const obj &arg)-> obj
{
std::cout<<"Node "<<obj::NestLevel<<std::endl;
std::cout<<GridLogMessage<<"Node "<<obj::NestLevel<<std::endl;
obj ret;
ret.internal=function<N>(arg.internal);
return ret;

View File

@ -3,7 +3,7 @@
#include <algorithms/SparseMatrix.h>
#include <algorithms/LinearOperator.h>
#include <algorithms/CoarsenedMatrix.h>
#include <algorithms/Preconditioner.h>
#include <algorithms/approx/Zolotarev.h>
#include <algorithms/approx/Chebyshev.h>
@ -17,6 +17,12 @@
#include <algorithms/iterative/ConjugateGradientMultiShift.h>
// Lanczos support
#include <algorithms/iterative/MatrixUtils.h>
#include <algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <algorithms/CoarsenedMatrix.h>
// Eigen/lanczos
// EigCg
// MCR

View File

@ -1,6 +1,13 @@
#ifndef GRID_ALIGNED_ALLOCATOR_H
#define GRID_ALIGNED_ALLOCATOR_H
#ifdef HAVE_MALLOC_MALLOC_H
#include <malloc/malloc.h>
#endif
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include <immintrin.h>
#ifdef HAVE_MM_MALLOC_H
#include <mm_malloc.h>
@ -65,7 +72,6 @@ operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return t
template<typename _Tp> inline bool
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
}; // namespace Grid
#endif

View File

@ -1,4 +1,4 @@
/* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */
/* lib/Config.h.in. Generated from configure.ac by autoheader. */
/* AVX Intrinsics */
#undef AVX1
@ -6,9 +6,12 @@
/* AVX2 Intrinsics */
#undef AVX2
/* AVX512 Intrinsics for Knights Corner */
/* AVX512 Intrinsics for Knights Landing */
#undef AVX512
/* AVX Intrinsics with FMA4 */
#undef AVXFMA4
/* EMPTY_SIMD only for DEBUGGING */
#undef EMPTY_SIMD
@ -18,6 +21,12 @@
/* GRID_COMMS_NONE */
#undef GRID_COMMS_NONE
/* GRID_DEFAULT_PRECISION is DOUBLE */
#undef GRID_DEFAULT_PRECISION_DOUBLE
/* GRID_DEFAULT_PRECISION is SINGLE */
#undef GRID_DEFAULT_PRECISION_SINGLE
/* Support Altivec instructions */
#undef HAVE_ALTIVEC
@ -27,9 +36,6 @@
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
#undef HAVE_AVX2
/* define if the compiler supports basic C++11 syntax */
#undef HAVE_CXX11
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
don't. */
#undef HAVE_DECL_BE64TOH
@ -41,6 +47,9 @@
/* Define to 1 if you have the <endian.h> header file. */
#undef HAVE_ENDIAN_H
/* Define to 1 if you have the <execinfo.h> header file. */
#undef HAVE_EXECINFO_H
/* Support FMA3 (Fused Multiply-Add) instructions */
#undef HAVE_FMA
@ -53,12 +62,6 @@
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `gmp' library (-lgmp). */
#undef HAVE_LIBGMP
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
#undef HAVE_LIBMPFR
/* Define to 1 if you have the <malloc.h> header file. */
#undef HAVE_MALLOC_H
@ -113,8 +116,11 @@
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* NEON ARMv7 Experimental support */
#undef NEONv7
/* IMCI Intrinsics for Knights Corner */
#undef IMCI
/* NEON ARMv8 Experimental support */
#undef NEONv8
/* Name of package */
#undef PACKAGE
@ -131,9 +137,6 @@
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION

View File

@ -6,92 +6,49 @@
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
//
#ifndef GRID_H
#define GRID_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#define MIN(x,y) ((x)>(y)?(y):(x))
#endif
#define strong_inline __attribute__((always_inline)) inline
#include <GridConfig.h>
////////////////////////////////////////////////////////////
// Tunable header includes
////////////////////////////////////////////////////////////
#ifdef HAVE_MALLOC_MALLOC_H
#include <malloc/malloc.h>
#endif
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
///////////////////
// Grid headers
///////////////////
#include <serialisation/Serialisation.h>
#include <Config.h>
#include <Timer.h>
#include <Log.h>
#include <AlignedAllocator.h>
#include <Simd.h>
#include <Threads.h>
#include <Communicator.h> // subdir aggregate
#include <Cartesian.h> // subdir aggregate
#include <Tensors.h> // subdir aggregate
#include <Lattice.h> // subdir aggregate
#include <Cshift.h> // subdir aggregate
#include <Stencil.h> // subdir aggregate
#include <Algorithms.h>// subdir aggregate
#include <Communicator.h>
#include <Cartesian.h>
#include <Tensors.h>
#include <Lattice.h>
#include <Cshift.h>
#include <Stencil.h>
#include <Algorithms.h>
#include <qcd/QCD.h>
#include <parallelIO/BinaryIO.h>
#include <parallelIO/NerscIO.h>
namespace Grid {
#include <Init.h>
void Grid_init(int *argc,char ***argv);
void Grid_finalize(void);
// internal, controled with --handle
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
void Grid_debug_handler_init(void);
void Grid_quiesce_nodes(void);
void Grid_unquiesce_nodes(void);
// C++11 time facilities better?
double usecond(void);
const std::vector<int> GridDefaultSimd(int dims,int nsimd);
const std::vector<int> &GridDefaultLatt(void);
const std::vector<int> &GridDefaultMpi(void);
const int &GridThreads(void) ;
void GridSetThreads(int t) ;
// Common parsing chores
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
bool GridCmdOptionExists(char** begin, char** end, const std::string& option);
std::string GridCmdVectorIntToString(const std::vector<int> & vec);
void GridParseLayout(char **argv,int argc,
std::vector<int> &latt,
std::vector<int> &simd,
std::vector<int> &mpi);
};
#endif

View File

@ -1,169 +0,0 @@
/* lib/GridConfig.h. Generated from GridConfig.h.in by configure. */
/* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */
/* AVX Intrinsics */
/* #undef AVX1 */
/* AVX2 Intrinsics */
/* #undef AVX2 */
/* AVX512 Intrinsics for Knights Corner */
/* #undef AVX512 */
/* EMPTY_SIMD only for DEBUGGING */
/* #undef EMPTY_SIMD */
/* GRID_COMMS_MPI */
/* #undef GRID_COMMS_MPI */
/* GRID_COMMS_NONE */
#define GRID_COMMS_NONE 1
/* Support Altivec instructions */
/* #undef HAVE_ALTIVEC */
/* Support AVX (Advanced Vector Extensions) instructions */
/* #undef HAVE_AVX */
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
/* #undef HAVE_AVX2 */
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
don't. */
#define HAVE_DECL_BE64TOH 1
/* Define to 1 if you have the declaration of `ntohll', and to 0 if you don't.
*/
#define HAVE_DECL_NTOHLL 0
/* Define to 1 if you have the <endian.h> header file. */
#define HAVE_ENDIAN_H 1
/* Support FMA3 (Fused Multiply-Add) instructions */
/* #undef HAVE_FMA */
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY 1
/* Define to 1 if you have the <gmp.h> header file. */
#define HAVE_GMP_H 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the `gmp' library (-lgmp). */
#define HAVE_LIBGMP 1
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
#define HAVE_LIBMPFR 1
/* Define to 1 if you have the <malloc.h> header file. */
#define HAVE_MALLOC_H 1
/* Define to 1 if you have the <malloc/malloc.h> header file. */
/* #undef HAVE_MALLOC_MALLOC_H */
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Support mmx instructions */
#define HAVE_MMX /**/
/* Define to 1 if you have the <mm_malloc.h> header file. */
#define HAVE_MM_MALLOC_H 1
/* Support SSE (Streaming SIMD Extensions) instructions */
#define HAVE_SSE /**/
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
#define HAVE_SSE2 /**/
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
#define HAVE_SSE3 /**/
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
#define HAVE_SSE4_1 /**/
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
#define HAVE_SSE4_2 /**/
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
#define HAVE_SSSE3 /**/
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* NEON ARMv7 Experimental support */
/* #undef NEONv7 */
/* Name of package */
#define PACKAGE "grid"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "paboyle@ph.ed.ac.uk"
/* Define to the full name of this package. */
#define PACKAGE_NAME "Grid"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "Grid 1.0"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "grid"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.0"
/* SSE4 Intrinsics */
#define SSE4 1
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Version number of package */
#define VERSION "1.0"
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
/* #undef _UINT32_T */
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
/* #undef _UINT64_T */
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */
/* Define to the type of an unsigned integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
/* #undef uint32_t */
/* Define to the type of an unsigned integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
/* #undef uint64_t */

View File

@ -1,7 +1,6 @@
/****************************************************************************/
/* pab: Signal magic. Processor state dump is x86-64 specific */
/****************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
@ -16,26 +15,29 @@
#include <algorithm>
#include <iterator>
#undef __X86_64
#define MAC
#define __X86_64
#ifdef MAC
#ifdef HAVE_EXECINFO_H
#include <execinfo.h>
#endif
namespace Grid {
//////////////////////////////////////////////////////
// Convenience functions to access stadard command line arg
// driven parallelism controls
//////////////////////////////////////////////////////
static std::vector<int> Grid_default_latt;
static std::vector<int> Grid_default_mpi;
//////////////////////////////////////////////////////
// Convenience functions to access stadard command line arg
// driven parallelism controls
//////////////////////////////////////////////////////
static std::vector<int> Grid_default_latt;
static std::vector<int> Grid_default_mpi;
int GridThread::_threads;
int GridThread::_threads =1;
int GridThread::_hyperthreads=1;
int GridThread::_cores=1;
const std::vector<int> GridDefaultSimd(int dims,int nsimd)
{
const std::vector<int> &GridDefaultLatt(void) {return Grid_default_latt;};
const std::vector<int> &GridDefaultMpi(void) {return Grid_default_mpi;};
const std::vector<int> GridDefaultSimd(int dims,int nsimd)
{
std::vector<int> layout(dims);
int nn=nsimd;
for(int d=dims-1;d>=0;d--){
@ -48,15 +50,11 @@ namespace Grid {
}
assert(nn==1);
return layout;
}
}
const std::vector<int> &GridDefaultLatt(void) {return Grid_default_latt;};
const std::vector<int> &GridDefaultMpi(void) {return Grid_default_mpi;};
////////////////////////////////////////////////////////////
// Command line parsing assist for stock controls
////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////
// Command line parsing assist for stock controls
////////////////////////////////////////////////////////////
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option)
{
char ** itr = std::find(begin, end, option);
@ -70,6 +68,23 @@ bool GridCmdOptionExists(char** begin, char** end, const std::string& option)
{
return std::find(begin, end, option) != end;
}
// Comma separated list
void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec)
{
size_t pos = 0;
std::string token;
std::string delimiter(",");
vec.resize(0);
while ((pos = str.find(delimiter)) != std::string::npos) {
token = str.substr(0, pos);
vec.push_back(token);
str.erase(0, pos + delimiter.length());
}
token = str;
vec.push_back(token);
return;
}
void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
{
@ -84,6 +99,7 @@ void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
return;
}
void GridParseLayout(char **argv,int argc,
std::vector<int> &latt,
std::vector<int> &mpi)
@ -102,13 +118,19 @@ void GridParseLayout(char **argv,int argc,
arg= GridCmdOptionPayload(argv,argv+argc,"--grid");
GridCmdOptionIntVector(arg,latt);
}
if( GridCmdOptionExists(argv,argv+argc,"--omp") ){
if( GridCmdOptionExists(argv,argv+argc,"--threads") ){
std::vector<int> ompthreads(0);
arg= GridCmdOptionPayload(argv,argv+argc,"--omp");
arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
GridCmdOptionIntVector(arg,ompthreads);
assert(ompthreads.size()==1);
GridThread::SetThreads(ompthreads[0]);
}
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
std::vector<int> cores(0);
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
GridCmdOptionIntVector(arg,cores);
GridThread::SetCores(cores[0]);
}
}
@ -117,8 +139,9 @@ std::string GridCmdVectorIntToString(const std::vector<int> & vec){
std::copy(vec.begin(), vec.end(),std::ostream_iterator<int>(oss, " "));
return oss.str();
}
/////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////
//
/////////////////////////////////////////////////////////
void Grid_init(int *argc,char ***argv)
{
#ifdef GRID_COMMS_MPI
@ -126,15 +149,33 @@ void Grid_init(int *argc,char ***argv)
#endif
// Parse command line args.
GridLogger::StopWatch.Start();
std::string arg;
std::vector<std::string> logstreams;
std::string defaultLog("Error,Warning,Message,Performance");
GridCmdOptionCSL(defaultLog,logstreams);
GridLogConfigure(logstreams);
if( GridCmdOptionExists(*argv,*argv+*argc,"--help") ){
std::cout<<"--help : this message"<<std::endl;
std::cout<<"--debug-signals : catch sigsegv and print a blame report"<<std::endl;
std::cout<<"--debug-stdout : print stdout from EVERY node"<<std::endl;
std::cout<<"--decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
std::cout<<"--mpi n.n.n.n : default MPI decomposition"<<std::endl;
std::cout<<"--omp n : default number of OMP threads"<<std::endl;
std::cout<<"--grid n.n.n.n : default Grid size"<<std::endl;
std::cout<<GridLogMessage<<"--help : this message"<<std::endl;
std::cout<<GridLogMessage<<"--debug-signals : catch sigsegv and print a blame report"<<std::endl;
std::cout<<GridLogMessage<<"--debug-stdout : print stdout from EVERY node"<<std::endl;
std::cout<<GridLogMessage<<"--decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
std::cout<<GridLogMessage<<"--mpi n.n.n.n : default MPI decomposition"<<std::endl;
std::cout<<GridLogMessage<<"--omp n : default number of OMP threads"<<std::endl;
std::cout<<GridLogMessage<<"--grid n.n.n.n : default Grid size"<<std::endl;
std::cout<<GridLogMessage<<"--log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Debug"<<std::endl;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
arg = GridCmdOptionPayload(*argv,*argv+*argc,"--log");
GridCmdOptionCSL(arg,logstreams);
GridLogConfigure(logstreams);
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init();
}
@ -142,48 +183,33 @@ void Grid_init(int *argc,char ***argv)
Grid_quiesce_nodes();
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
WilsonFermion::HandOptDslash=1;
WilsonFermion5D::HandOptDslash=1;
QCD::WilsonFermionStatic::HandOptDslash=1;
QCD::WilsonFermion5DStatic::HandOptDslash=1;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
LebesgueOrder::UseLebesgueOrder=1;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
GridCmdOptionIntVector(arg,LebesgueOrder::Block);
}
GridParseLayout(*argv,*argc,
Grid_default_latt,
Grid_default_mpi);
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
std::cout<<"Grid Decomposition\n";
std::cout<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
std::cout<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
std::cout<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
std::cout<<"\tvRealD : "<<sizeof(vRealD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
std::cout<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
std::cout<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
std::cout<<GridLogMessage<<"Grid Decomposition\n";
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
std::cout<<GridLogMessage<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
std::cout<<GridLogMessage<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
std::cout<<GridLogMessage<<"\tvRealD : "<<sizeof(vRealD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
std::cout<<GridLogMessage<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
}
}
////////////////////////////////////////////////////////////
// Verbose limiter on MPI tasks
////////////////////////////////////////////////////////////
void Grid_quiesce_nodes(void)
{
#ifdef GRID_COMMS_MPI
int me;
MPI_Comm_rank(MPI_COMM_WORLD,&me);
if ( me ) {
std::cout.setstate(std::ios::badbit);
}
#endif
}
void Grid_unquiesce_nodes(void)
{
#ifdef GRID_COMMS_MPI
std::cout.clear();
#endif
}
void Grid_finalize(void)
{
@ -207,11 +233,14 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
printf(" mem address %llx\n",(unsigned long long)si->si_addr);
printf(" code %d\n",si->si_code);
#ifdef __X86_64
// Linux/Posix
#ifdef __linux__
// And x86 64bit
ucontext_t * uc= (ucontext_t *)ptr;
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
printf(" instruction %llx\n",(unsigned long long)sc->rip);
#define REG(A) printf(" %s %lx\n",#A,sc-> A);
REG(rdi);
REG(rsi);
REG(rbp);
@ -232,7 +261,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
REG(r14);
REG(r15);
#endif
#ifdef MAC
#ifdef HAVE_EXECINFO_H
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);
for (int i = 0; i < symbols; i++){

32
lib/Init.h Normal file
View File

@ -0,0 +1,32 @@
#ifndef GRID_INIT_H
#define GRID_INIT_H
namespace Grid {
void Grid_init(int *argc,char ***argv);
void Grid_finalize(void);
// internal, controled with --handle
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
void Grid_debug_handler_init(void);
void Grid_quiesce_nodes(void);
void Grid_unquiesce_nodes(void);
const std::vector<int> GridDefaultSimd(int dims,int nsimd);
const std::vector<int> &GridDefaultLatt(void);
const std::vector<int> &GridDefaultMpi(void);
const int &GridThreads(void) ;
void GridSetThreads(int t) ;
// Common parsing chores
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
bool GridCmdOptionExists(char** begin, char** end, const std::string& option);
std::string GridCmdVectorIntToString(const std::vector<int> & vec);
void GridParseLayout(char **argv,int argc,
std::vector<int> &latt,
std::vector<int> &simd,
std::vector<int> &mpi);
};
#endif

62
lib/Log.cc Normal file
View File

@ -0,0 +1,62 @@
#include <Grid.h>
namespace Grid {
GridStopWatch Logger::StopWatch;
std::ostream Logger::devnull(0);
GridLogger GridLogError (1,"Error");
GridLogger GridLogWarning (1,"Warning");
GridLogger GridLogMessage (1,"Message");
GridLogger GridLogDebug (1,"Debug");
GridLogger GridLogPerformance(1,"Performance");
GridLogger GridLogIterative (1,"Iterative");
void GridLogConfigure(std::vector<std::string> &logstreams)
{
GridLogError.Active(0);
GridLogWarning.Active(0);
GridLogMessage.Active(0);
GridLogIterative.Active(0);
GridLogDebug.Active(0);
GridLogPerformance.Active(0);
for(int i=0;i<logstreams.size();i++){
if ( logstreams[i]== std::string("Error") ) GridLogError.Active(1);
if ( logstreams[i]== std::string("Warning") ) GridLogWarning.Active(1);
if ( logstreams[i]== std::string("Message") ) GridLogMessage.Active(1);
if ( logstreams[i]== std::string("Iterative") ) GridLogIterative.Active(1);
if ( logstreams[i]== std::string("Debug") ) GridLogDebug.Active(1);
if ( logstreams[i]== std::string("Performance") ) GridLogPerformance.Active(1);
}
}
////////////////////////////////////////////////////////////
// Verbose limiter on MPI tasks
////////////////////////////////////////////////////////////
void Grid_quiesce_nodes(void)
{
#ifdef GRID_COMMS_MPI
int me;
MPI_Comm_rank(MPI_COMM_WORLD,&me);
if ( me ) {
std::cout.setstate(std::ios::badbit);
}
#endif
}
void Grid_unquiesce_nodes(void)
{
#ifdef GRID_COMMS_MPI
std::cout.clear();
#endif
}
std::ostream& operator<< (std::ostream& stream, const GridTime& time)
{
stream << time.count()<<" ms";
return stream;
}
}

54
lib/Log.h Normal file
View File

@ -0,0 +1,54 @@
#ifndef GRID_LOG_H
#define GRID_LOG_H
namespace Grid {
// Dress the output; use std::chrono for time stamping via the StopWatch class
std::ostream& operator<< (std::ostream& stream, const GridTime& time);
class Logger {
protected:
int active;
std::string name, topName;
public:
static GridStopWatch StopWatch;
static std::ostream devnull;
Logger(std::string topNm, int on, std::string nm)
: active(on), name(nm), topName(topNm) {};
void Active(int on) {active = on;};
int isActive(void) {return active;};
friend std::ostream& operator<< (std::ostream& stream, const Logger& log){
if ( log.active ) {
StopWatch.Stop();
GridTime now = StopWatch.Elapsed();
StopWatch.Start();
stream << std::setw(8) << std::left << log.topName << " : ";
stream << std::setw(12) << std::left << log.name << " : ";
stream << now << " : ";
return stream;
} else {
return devnull;
}
}
};
class GridLogger: public Logger {
public:
GridLogger(int on, std::string nm): Logger("Grid", on, nm){};
};
void GridLogConfigure(std::vector<std::string> &logstreams);
extern GridLogger GridLogError;
extern GridLogger GridLogWarning;
extern GridLogger GridLogMessage;
extern GridLogger GridLogDebug ;
extern GridLogger GridLogPerformance;
extern GridLogger GridLogIterative ;
}
#endif

View File

@ -1,4 +1,4 @@
HFILES=./Cshift.h ./simd/Grid_avx.h ./simd/Grid_vector_types.h ./simd/Grid_sse4.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_vector_unops.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./Tensors.h ./Algorithms.h ./communicator/Communicator_base.h ./lattice/Lattice_rng.h ./lattice/Lattice_reduction.h ./lattice/Lattice_transfer.h ./lattice/Lattice_unary.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_comparison.h ./lattice/Lattice_overload.h ./lattice/Lattice_reality.h ./lattice/Lattice_local.h ./lattice/Lattice_conformable.h ./lattice/Lattice_where.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_ET.h ./lattice/Lattice_transpose.h ./lattice/Lattice_trace.h ./Stencil.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_poke.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_class.h ./tensors/Tensor_logical.h ./tensors/Tensor_transpose.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_outer.h ./tensors/Tensor_inner.h ./tensors/Tensor_traits.h ./tensors/Tensor_Ta.h ./tensors/Tensor_unary.h ./tensors/Tensor_determinant.h ./tensors/Tensor_peek.h ./tensors/Tensor_arith.h ./tensors/Tensor_extract_merge.h ./Communicator.h ./Cartesian.h ./parallelIO/NerscIO.h ./qcd/QCD.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/LinalgUtils.h ./qcd/utils/CovariantCshift.h ./qcd/utils/WilsonLoops.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/spin/TwoSpinor.h ./qcd/spin/Dirac.h ./cshift/Cshift_common.h ./cshift/Cshift_none.h ./cshift/Cshift_mpi.h ./Simd.h ./GridConfig.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_red_black.h ./cartesian/Cartesian_full.h ./AlignedAllocator.h ./Lattice.h ./Threads.h ./Grid.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Remez.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./algorithms/CoarsenedMatrix.h ./stencil/Lebesgue.h
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Config.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./Init.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./pugixml/pugixml.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h ./Timer.h
CCFILES=./qcd/utils/SpaceTimeGrid.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/spin/Dirac.cc ./GridInit.cc ./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./Init.cc ./Log.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./serialisation/BinaryIO.cc ./serialisation/TextIO.cc ./serialisation/XmlIO.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc

View File

@ -11,7 +11,7 @@ namespace Grid {
//template<int Level> inline ComplexD peekIndex(const ComplexD arg) { return arg;}
//template<int Level> inline RealF peekIndex(const RealF arg) { return arg;}
//template<int Level> inline RealD peekIndex(const RealD arg) { return arg;}
#if 0
// Scalar peek, no indices
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<vtype>
@ -88,6 +88,7 @@ template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::T
}
return ret;
}
// matrix
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N>
@ -119,6 +120,7 @@ template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::T
}}
return ret;
}
#endif
}

View File

@ -5,7 +5,7 @@ namespace Grid {
//////////////////////////////////////////////////////////////////////////////
// Poke a specific index;
//////////////////////////////////////////////////////////////////////////////
#if 0
// Scalar poke
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<vtype> &arg)
@ -18,7 +18,7 @@ template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::Te
{
ret._internal[i] = arg._internal;
}
// Vector poke, two indices
//Matrix poke, two indices
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
{
@ -31,7 +31,6 @@ template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::Te
// scalar
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal))> &arg)
{
pokeIndex<Level>(ret._internal,arg._internal);
}
@ -95,7 +94,7 @@ template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::Te
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
}}
}
#endif
}
#endif

28
lib/PerfCount.cc Normal file
View File

@ -0,0 +1,28 @@
#include <Grid.h>
#include <PerfCount.h>
namespace Grid {
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES , "CPUCYCLES.........." },
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS , "INSTRUCTIONS......." },
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES , "CACHE_REFERENCES..." },
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES , "CACHE_MISSES......." },
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS) , "L1D_READ_MISS......"},
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS) , "L1D_READ_ACCESS...."},
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS) , "L1D_WRITE_MISS....."},
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS) , "L1D_WRITE_ACCESS..."},
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS) , "L1D_PREFETCH_MISS.."},
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS"},
{ PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS) , "LL_READ_MISS......."},
// { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS) , "LL_READ_ACCESS....."},
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS) , "LL_WRITE_MISS......"},
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS) , "LL_WRITE_ACCESS...."},
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS) , "LL_PREFETCH_MISS..."},
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS) , "LL_PREFETCH_ACCESS."},
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS) , "L1I_READ_MISS......"},
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS) , "L1I_READ_ACCESS...."}
// { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" },
};
}

157
lib/PerfCount.h Normal file
View File

@ -0,0 +1,157 @@
#ifndef GRID_PERFCOUNT_H
#define GRID_PERFCOUNT_H
#include <sys/time.h>
#include <ctime>
#include <chrono>
#include <string.h>
#include <sys/ioctl.h>
#ifdef __linux__
#include <syscall.h>
#include <linux/perf_event.h>
#endif
namespace Grid {
#ifdef __linux__
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret=0;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
#endif
class PerformanceCounter {
private:
typedef struct {
public:
uint32_t type;
uint64_t config;
const char *name;
} PerformanceCounterConfig;
static const PerformanceCounterConfig PerformanceCounterConfigs [];
public:
enum PerformanceCounterType {
CPUCYCLES=0,
INSTRUCTIONS,
// STALL_CYCLES,
CACHE_REFERENCES,
CACHE_MISSES,
L1D_READ_MISS,
L1D_READ_ACCESS,
L1D_WRITE_MISS,
L1D_WRITE_ACCESS,
L1D_PREFETCH_MISS,
L1D_PREFETCH_ACCESS,
LL_READ_MISS,
// LL_READ_ACCESS,
LL_WRITE_MISS,
LL_WRITE_ACCESS,
LL_PREFETCH_MISS,
LL_PREFETCH_ACCESS,
L1I_READ_MISS,
L1I_READ_ACCESS,
PERFORMANCE_COUNTER_NUM_TYPES
};
public:
int PCT;
long long count;
int fd;
uint64_t elapsed;
uint64_t begin;
static int NumTypes(void){
return PERFORMANCE_COUNTER_NUM_TYPES;
}
PerformanceCounter(int _pct) {
#ifdef __linux__
assert(_pct>=0);
assert(_pct<PERFORMANCE_COUNTER_NUM_TYPES);
fd=-1;
count=0;
PCT =_pct;
Open();
#endif
}
void Open(void)
{
#ifdef __linux__
struct perf_event_attr pe;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.size = sizeof(struct perf_event_attr);
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
pe.inherit = 1;
pe.type = PerformanceCounterConfigs[PCT].type;
pe.config= PerformanceCounterConfigs[PCT].config;
const char * name = PerformanceCounterConfigs[PCT].name;
fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
perror("Error is");
}
#endif
}
void Start(void)
{
#ifdef __linux__
if ( fd!= -1) {
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
}
begin =__rdtsc();
#else
begin = 0;
#endif
}
void Stop(void) {
count=0;
#ifdef __linux__
if ( fd!= -1) {
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
::read(fd, &count, sizeof(long long));
}
elapsed = __rdtsc() - begin;
#else
elapsed = 0;
#endif
}
void Report(void) {
#ifdef __linux__
printf("%llu cycles %s = %20llu\n", elapsed , PerformanceCounterConfigs[PCT].name, count);
#else
printf("%llu cycles \n", elapsed );
#endif
}
~PerformanceCounter()
{
#ifdef __linux__
close(fd);
#endif
}
};
}
#endif

View File

@ -13,6 +13,11 @@
typedef uint32_t Integer;
#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
#define _MM_SELECT_FOUR_TWO (A,B,C,D) _MM_SELECT_EIGHT_TWO(0,0,0,0,A,B,C,D)
#define _MM_SELECT_TWO_TWO (A,B) _MM_SELECT_FOUR_TWO(0,0,A,B)
namespace Grid {
typedef float RealF;

View File

@ -41,11 +41,21 @@
namespace Grid {
struct StencilEntry {
int _offset;
int _is_local;
int _permute;
int _around_the_world;
};
template<class vobj,class cobj, class compressor>
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
public:
typedef uint32_t StencilInteger;
typedef typename cobj::vector_type vector_type;
typedef typename cobj::scalar_type scalar_type;
typedef typename cobj::scalar_object scalar_object;
int _checkerboard;
int _npoints; // Move to template param?
@ -58,35 +68,336 @@ namespace Grid {
std::vector<int> _permute_type;
// npoints x Osites() of these
std::vector<std::vector<int> > _offsets;
std::vector<std::vector<int> > _is_local;
std::vector<std::vector<int> > _permute;
std::vector<std::vector<StencilEntry> > _entries;
// Comms buffers
std::vector<std::vector<scalar_object> > send_buf_extract;
std::vector<std::vector<scalar_object> > recv_buf_extract;
std::vector<scalar_object *> pointers;
std::vector<scalar_object *> rpointers;
Vector<cobj> send_buf;
inline StencilEntry * GetEntry(int &ptype,int point,int osite) { ptype = _permute_type[point]; return & _entries[point][osite]; }
int _unified_buffer_size;
int _request_count;
double buftime;
double gathertime;
double commtime;
double commstime;
double halotime;
double scattertime;
double mergetime;
double gathermtime;
double splicetime;
double nosplicetime;
CartesianStencil(GridBase *grid,
int npoints,
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances);
CartesianStencil(GridBase *grid,
int npoints,
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances)
: _entries(npoints), _permute_type(npoints), _comm_buf_size(npoints)
{
gathertime=0;
commtime=0;
commstime=0;
halotime=0;
scattertime=0;
mergetime=0;
gathermtime=0;
buftime=0;
splicetime=0;
nosplicetime=0;
_npoints = npoints;
_grid = grid;
_directions = directions;
_distances = distances;
_unified_buffer_size=0;
_request_count =0;
int osites = _grid->oSites();
for(int i=0;i<npoints;i++){
int point = i;
_entries[i].resize( osites);
int dimension = directions[i];
int displacement = distances[i];
int shift = displacement;
int fd = _grid->_fdimensions[dimension];
int rd = _grid->_rdimensions[dimension];
_permute_type[point]=_grid->PermuteType(dimension);
_checkerboard = checkerboard;
// the permute type
int simd_layout = _grid->_simd_layout[dimension];
int comm_dim = _grid->_processors[dimension] >1 ;
int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim);
int sshift[2];
// Underlying approach. For each local site build
// up a table containing the npoint "neighbours" and whether they
// live in lattice or a comms buffer.
if ( !comm_dim ) {
sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
if ( sshift[0] == sshift[1] ) {
Local(point,dimension,shift,0x3);
} else {
Local(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Local(point,dimension,shift,0x2);// both with block stride loop iteration
}
} else { // All permute extract done in comms phase prior to Stencil application
// So tables are the same whether comm_dim or splice_dim
sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
if ( sshift[0] == sshift[1] ) {
Comms(point,dimension,shift,0x3);
// std::cout<<"Comms 0x3"<<std::endl;
} else {
Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Comms(point,dimension,shift,0x2);// both with block stride loop iteration
// std::cout<<"Comms 0x1 ; 0x2"<<std::endl;
}
}
// for(int ss=0;ss<osites;ss++){
// std::cout << "point["<<i<<"] "<<ss<<"-> o"<<_entries[i][ss]._offset<<"; l"<<
// _entries[i][ss]._is_local<<"; p"<<_entries[i][ss]._permute<<std::endl;
// }
}
}
void Local (int point, int dimension,int shiftpm,int cbmask)
{
int fd = _grid->_fdimensions[dimension];
int rd = _grid->_rdimensions[dimension];
int ld = _grid->_ldimensions[dimension];
int gd = _grid->_gdimensions[dimension];
// Map to always positive shift modulo global full dimension.
int shift = (shiftpm+fd)%fd;
// the permute type
int permute_dim =_grid->PermuteDim(dimension);
for(int x=0;x<rd;x++){
int o = 0;
int bo = x * _grid->_ostride[dimension];
int cb= (cbmask==0x2)? Odd : Even;
int sshift = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
int sx = (x+sshift)%rd;
int wraparound=0;
if ( (shiftpm==-1) && (sx>x) ) {
wraparound = 1;
}
if ( (shiftpm== 1) && (sx<x) ) {
wraparound = 1;
}
int permute_slice=0;
if(permute_dim){
int wrap = sshift/rd;
int num = sshift%rd;
if ( x< rd-num ) permute_slice=wrap;
else permute_slice = 1-wrap;
}
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
}
}
void Comms (int point,int dimension,int shiftpm,int cbmask)
{
GridBase *grid=_grid;
int fd = _grid->_fdimensions[dimension];
int ld = _grid->_ldimensions[dimension];
int rd = _grid->_rdimensions[dimension];
int pd = _grid->_processors[dimension];
int simd_layout = _grid->_simd_layout[dimension];
int comm_dim = _grid->_processors[dimension] >1 ;
// assert(simd_layout==1); // Why?
assert(comm_dim==1);
int shift = (shiftpm + fd) %fd;
assert(shift>=0);
assert(shift<fd);
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
_comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and
// send to one or more remote nodes.
int cb= (cbmask==0x2)? Odd : Even;
int sshift= _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
for(int x=0;x<rd;x++){
int sx = (x+sshift)%rd;
int comm_proc = ((x+sshift)/rd)%pd;
int offnode = (comm_proc!= 0);
// std::cout << "Stencil shift "<<shift<<" sshift "<<sshift<<" fd "<<fd<<" rd " <<rd<<" offnode "<<offnode<<" sx "<<sx<<std::endl;
int wraparound=0;
if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) {
wraparound = 1;
}
if ( (shiftpm== 1) && (sx<x) && (grid->_processor_coor[dimension]==grid->_processors[dimension]-1) ) {
wraparound = 1;
}
if (!offnode) {
int permute_slice=0;
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
} else {
int words = buffer_size;
if (cbmask != 0x3) words=words>>1;
// GatherPlaneSimple (point,dimension,sx,cbmask);
int rank = grid->_processor;
int recv_from_rank;
int xmit_to_rank;
int unified_buffer_offset = _unified_buffer_size;
_unified_buffer_size += words;
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase
}
}
}
// Routine builds up integer table for each site in _offsets, _is_local, _permute
void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute,int wrap)
{
int rd = _grid->_rdimensions[dimension];
if ( !_grid->CheckerBoarded(dimension) ) {
int o = 0; // relative offset to base within plane
int ro = rplane*_grid->_ostride[dimension]; // base offset for start of plane
int lo = lplane*_grid->_ostride[dimension]; // offset in buffer
// Simple block stride gather of SIMD objects
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
for(int b=0;b<_grid->_slice_block[dimension];b++){
_entries[point][lo+o+b]._offset =ro+o+b;
_entries[point][lo+o+b]._is_local=1;
_entries[point][lo+o+b]._permute=permute;
_entries[point][lo+o+b]._around_the_world=wrap;
}
o +=_grid->_slice_stride[dimension];
}
} else {
int ro = rplane*_grid->_ostride[dimension]; // base offset for start of plane
int lo = lplane*_grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
for(int b=0;b<_grid->_slice_block[dimension];b++){
int ocb=1<<_grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) {
_entries[point][lo+o+b]._offset =ro+o+b;
_entries[point][lo+o+b]._is_local=1;
_entries[point][lo+o+b]._permute=permute;
_entries[point][lo+o+b]._around_the_world=wrap;
}
}
o +=_grid->_slice_stride[dimension];
}
}
}
// Routine builds up integer table for each site in _offsets, _is_local, _permute
void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset, int wrap)
{
int rd = _grid->_rdimensions[dimension];
if ( !_grid->CheckerBoarded(dimension) ) {
int so = plane*_grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
// Simple block stride gather of SIMD objects
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
for(int b=0;b<_grid->_slice_block[dimension];b++){
_entries[point][so+o+b]._offset =offset+(bo++);
_entries[point][so+o+b]._is_local=0;
_entries[point][so+o+b]._permute=0;
_entries[point][so+o+b]._around_the_world=wrap;
}
o +=_grid->_slice_stride[dimension];
}
} else {
int so = plane*_grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
for(int b=0;b<_grid->_slice_block[dimension];b++){
int ocb=1<<_grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
_entries[point][so+o+b]._offset =offset+(bo++);
_entries[point][so+o+b]._is_local=0;
_entries[point][so+o+b]._permute =0;
_entries[point][so+o+b]._around_the_world=wrap;
}
}
o +=_grid->_slice_stride[dimension];
}
}
}
// CartesianStencil(GridBase *grid,
// int npoints,
// int checkerboard,
// const std::vector<int> &directions,
// const std::vector<int> &distances);
// Add to tables for various cases; is this mistaken. only local if 1 proc in dim
// Can this be avoided with simpler coding of comms?
void Local (int point, int dimension,int shift,int cbmask);
void Comms (int point, int dimension,int shift,int cbmask);
void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute);
void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset);
// void Local (int point, int dimension,int shift,int cbmask);
// void Comms (int point, int dimension,int shift,int cbmask);
// void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute,int wrap);
// void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset,int wrap);
// Could allow a functional munging of the halo to another type during the comms.
// this could implement the 16bit/32bit/64bit compression.
template<class vobj,class cobj, class compressor> void
HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
void HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
{
// conformable(source._grid,_grid);
assert(source._grid==_grid);
halotime-=usecond();
if (u_comm_buf.size() != _unified_buffer_size ) u_comm_buf.resize(_unified_buffer_size);
int u_comm_offset=0;
@ -120,24 +431,33 @@ namespace Grid {
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
if ( sshift[0] == sshift[1] ) {
if (splice_dim) {
splicetime-=usecond();
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
splicetime+=usecond();
} else {
nosplicetime-=usecond();
GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
nosplicetime+=usecond();
}
} else {
std::cout << "dim "<<dimension<<"cb "<<_checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if(splice_dim){
splicetime-=usecond();
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);// both with block stride loop iteration
splicetime+=usecond();
} else {
nosplicetime-=usecond();
GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);
GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);
nosplicetime+=usecond();
}
}
}
}
halotime+=usecond();
}
template<class vobj,class cobj, class compressor>
void GatherStartComms(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
int &u_comm_offset,compressor & compress)
@ -158,28 +478,29 @@ namespace Grid {
assert(comm_dim==1);
assert(shift>=0);
assert(shift<fd);
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
std::vector<cobj,alignedAllocator<cobj> > send_buf(buffer_size); // hmm...
std::vector<cobj,alignedAllocator<cobj> > recv_buf(buffer_size);
if(send_buf.size()<buffer_size) send_buf.resize(buffer_size);
int cb= (cbmask==0x2)? Odd : Even;
int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
for(int x=0;x<rd;x++){
int sx = (x+sshift)%rd;
int comm_proc = ((x+sshift)/rd)%pd;
if (comm_proc) {
int words = send_buf.size();
int words = buffer_size;
if (cbmask != 0x3) words=words>>1;
int bytes = words * sizeof(cobj);
gathertime-=usecond();
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask,compress);
gathertime+=usecond();
int rank = _grid->_processor;
int recv_from_rank;
@ -189,31 +510,27 @@ namespace Grid {
assert (recv_from_rank != _grid->ThisRank());
// FIXME Implement asynchronous send & also avoid buffer copy
commtime-=usecond();
_grid->SendToRecvFrom((void *)&send_buf[0],
xmit_to_rank,
(void *)&recv_buf[0],
(void *)&u_comm_buf[u_comm_offset],
recv_from_rank,
bytes);
commtime+=usecond();
for(int i=0;i<buffer_size;i++){
u_comm_buf[u_comm_offset+i]=recv_buf[i];
}
u_comm_offset+=buffer_size;
u_comm_offset+=words;
}
}
}
template<class vobj,class cobj, class compressor>
void GatherStartCommsSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
int &u_comm_offset,compressor &compress)
{
buftime-=usecond();
const int Nsimd = _grid->Nsimd();
typedef typename cobj::vector_type vector_type;
typedef typename cobj::scalar_type scalar_type;
typedef typename cobj::scalar_object scalar_object;
int fd = _grid->_fdimensions[dimension];
int rd = _grid->_rdimensions[dimension];
@ -235,17 +552,23 @@ namespace Grid {
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
int words = sizeof(cobj)/sizeof(vector_type);
/*
* possibly slow to allocate
* Doesn't matter in this test, but may want to preallocate in the
* dirac operators
*/
std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
int bytes = buffer_size*sizeof(scalar_object);
assert(cbmask==0x3); // Fixme think there is a latent bug if not true
std::vector<scalar_object *> pointers(Nsimd); //
std::vector<scalar_object *> rpointers(Nsimd); // received pointers
// Should grow to max size and then cost very little thereafter
send_buf_extract.resize(Nsimd);
recv_buf_extract.resize(Nsimd);
for(int l=0;l<Nsimd;l++){
if( send_buf_extract[l].size() < buffer_size) {
send_buf_extract[l].resize(buffer_size);
recv_buf_extract[l].resize(buffer_size);
}
}
pointers.resize(Nsimd);
rpointers.resize(Nsimd);
int bytes = buffer_size*sizeof(scalar_object);
buftime+=usecond();
///////////////////////////////////////////
// Work out what to send where
@ -266,7 +589,9 @@ namespace Grid {
}
int sx = (x+sshift)%rd;
gathermtime-=usecond();
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
gathermtime+=usecond();
for(int i=0;i<Nsimd;i++){
@ -293,11 +618,13 @@ namespace Grid {
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
commstime-=usecond();
_grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0],
xmit_to_rank,
(void *)&recv_buf_extract[i][0],
recv_from_rank,
bytes);
commstime+=usecond();
rpointers[i] = &recv_buf_extract[i][0];
@ -307,11 +634,13 @@ namespace Grid {
}
// Here we don't want to scatter, just place into a buffer.
mergetime-=usecond();
PARALLEL_FOR_LOOP
for(int i=0;i<buffer_size;i++){
assert(u_comm_offset+i<_unified_buffer_size);
// assert(u_comm_offset+i<_unified_buffer_size);
merge(u_comm_buf[u_comm_offset+i],rpointers,i);
}
mergetime+=usecond();
u_comm_offset+=buffer_size;
}
}

View File

@ -8,11 +8,12 @@
#include <tensors/Tensor_outer.h>
#include <tensors/Tensor_transpose.h>
#include <tensors/Tensor_trace.h>
#include <tensors/Tensor_index.h>
#include <tensors/Tensor_Ta.h>
#include <tensors/Tensor_determinant.h>
#include <tensors/Tensor_exp.h>
#include <tensors/Tensor_peek.h>
#include <tensors/Tensor_poke.h>
//#include <tensors/Tensor_peek.h>
//#include <tensors/Tensor_poke.h>
#include <tensors/Tensor_reality.h>
#include <tensors/Tensor_unary.h>
#include <tensors/Tensor_extract_merge.h>

View File

@ -24,7 +24,16 @@ namespace Grid {
class GridThread {
public:
static int _threads;
static int _hyperthreads;
static int _cores;
static void SetCores(int cr) {
#ifdef GRID_OMP
_cores = cr;
#else
_cores = 1;
#endif
}
static void SetThreads(int thr) {
#ifdef GRID_OMP
_threads = MIN(thr,omp_get_max_threads()) ;
@ -35,22 +44,28 @@ class GridThread {
};
static void SetMaxThreads(void) {
#ifdef GRID_OMP
// setenv("KMP_AFFINITY","balanced",1);
_threads = omp_get_max_threads();
omp_set_num_threads(_threads);
#else
_threads = 1;
#endif
};
static int GetHyperThreads(void) { assert(_threads%_cores ==0); return _threads/_cores; };
static int GetCores(void) { return _cores; };
static int GetThreads(void) { return _threads; };
static int SumArraySize(void) {return _threads;};
static void GetWork(int nwork, int me, int & mywork, int & myoff){
int basework = nwork/_threads;
int backfill = _threads-(nwork%_threads);
if ( me >= _threads ) {
GetWork(nwork,me,mywork,myoff,_threads);
}
static void GetWork(int nwork, int me, int & mywork, int & myoff,int units){
int basework = nwork/units;
int backfill = units-(nwork%units);
if ( me >= units ) {
mywork = myoff = 0;
} else {
mywork = (nwork+me)/_threads;
mywork = (nwork+me)/units;
myoff = basework * me;
if ( me > backfill )
myoff+= (me-backfill);

52
lib/Timer.h Normal file
View File

@ -0,0 +1,52 @@
#ifndef GRID_TIME_H
#define GRID_TIME_H
#include <sys/time.h>
#include <ctime>
#include <chrono>
namespace Grid {
// Dress the output; use std::chrono
// C++11 time facilities better?
double usecond(void);
typedef std::chrono::system_clock GridClock;
typedef std::chrono::time_point<GridClock> GridTimePoint;
typedef std::chrono::milliseconds GridTime;
class GridStopWatch {
private:
bool running;
GridTimePoint start;
GridTime accumulator;
public:
GridStopWatch () {
Reset();
}
void Start(void) {
assert(running == false);
start = GridClock::now();
running = true;
}
void Stop(void) {
assert(running == true);
accumulator+= std::chrono::duration_cast<GridTime>(GridClock::now()-start);
running = false;
};
void Reset(void){
running = false;
start = GridClock::now();
accumulator = std::chrono::duration_cast<GridTime>(start-start);
}
GridTime Elapsed(void) {
assert(running == false);
return accumulator;
}
};
}
#endif

View File

@ -12,9 +12,6 @@ namespace Grid {
std::vector<int> directions ;
std::vector<int> displacements;
// FIXME -- don't like xposing the operator directions
// as different to the geometrical dirs
// Also don't like special casing five dim.. should pass an object in template
Geometry(int _d) {
int base = (_d==5) ? 1:0;
@ -35,12 +32,12 @@ namespace Grid {
displacements[2*_d]=0;
//// report back
std::cout<<"directions :";
std::cout<<GridLogMessage<<"directions :";
for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
std::cout <<std::endl;
std::cout<<"displacements :";
std::cout<<GridLogMessage<<"displacements :";
for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
std::cout <<std::endl;
std::cout<<std::endl;
}
/*
@ -64,6 +61,97 @@ namespace Grid {
};
template<class Fobj,class CComplex,int nbasis>
class Aggregation {
public:
typedef iVector<CComplex,nbasis > siteVector;
typedef Lattice<siteVector> CoarseVector;
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj > FineField;
GridBase *CoarseGrid;
GridBase *FineGrid;
std::vector<Lattice<Fobj> > subspace;
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) :
CoarseGrid(_CoarseGrid),
FineGrid(_FineGrid),
subspace(nbasis,_FineGrid)
{
};
void Orthogonalise(void){
CoarseScalar InnerProd(CoarseGrid);
blockOrthogonalise(InnerProd,subspace);
}
void CheckOrthogonal(void){
CoarseVector iProj(CoarseGrid);
CoarseVector eProj(CoarseGrid);
Lattice<CComplex> pokey(CoarseGrid);
for(int i=0;i<nbasis;i++){
blockProject(iProj,subspace[i],subspace);
eProj=zero;
for(int ss=0;ss<CoarseGrid->oSites();ss++){
eProj._odata[ss](i)=CComplex(1.0);
}
eProj=eProj - iProj;
std::cout<<GridLogMessage<<"Orthog check error "<<i<<" " << norm2(eProj)<<std::endl;
}
std::cout<<GridLogMessage <<"CheckOrthog done"<<std::endl;
}
void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
blockProject(CoarseVec,FineVec,subspace);
}
void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
blockPromote(CoarseVec,FineVec,subspace);
}
void CreateSubspaceRandom(GridParallelRNG &RNG){
for(int i=0;i<nbasis;i++){
random(RNG,subspace[i]);
std::cout<<GridLogMessage<<" norm subspace["<<i<<"] "<<norm2(subspace[i])<<std::endl;
}
Orthogonalise();
}
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
RealD scale;
ConjugateGradient<FineField> CG(1.0e-2,10000);
FineField noise(FineGrid);
FineField Mn(FineGrid);
for(int b=0;b<nn;b++){
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
for(int i=0;i<1;i++){
CG(hermop,noise,subspace[b]);
noise = subspace[b];
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
}
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl;
subspace[b] = noise;
}
Orthogonalise();
}
};
// Fine Object == (per site) type of fine field
// nbasis == number of deflation vectors
template<class Fobj,class CComplex,int nbasis>
@ -82,7 +170,7 @@ namespace Grid {
////////////////////
Geometry geom;
GridBase * _grid;
CartesianStencil Stencil;
CartesianStencil<siteVector,siteVector,SimpleCompressor<siteVector> > Stencil;
std::vector<CoarseMatrix> A;
@ -101,24 +189,22 @@ namespace Grid {
SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,comm_buf,compressor);
//PARALLEL_FOR_LOOP
PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
siteVector res = zero;
siteVector nbr;
int offset,local,perm,ptype;
int ptype;
StencilEntry *SE;
for(int point=0;point<geom.npoint;point++){
offset = Stencil._offsets [point][ss];
local = Stencil._is_local[point][ss];
perm = Stencil._permute [point][ss];
ptype = Stencil._permute_type[point];
SE=Stencil.GetEntry(ptype,point,ss);
if(local&&perm) {
permute(nbr,in._odata[offset],ptype);
} else if(local) {
nbr = in._odata[offset];
if(SE->_is_local&&SE->_permute) {
permute(nbr,in._odata[SE->_offset],ptype);
} else if(SE->_is_local) {
nbr = in._odata[SE->_offset];
} else {
nbr = comm_buf[offset];
nbr = comm_buf[SE->_offset];
}
res = res + A[point]._odata[ss]*nbr;
}
@ -145,7 +231,8 @@ namespace Grid {
comm_buf.resize(Stencil._unified_buffer_size);
};
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,std::vector<Lattice<Fobj> > & subspace){
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
Aggregation<Fobj,CComplex,nbasis> & Subspace){
FineField iblock(FineGrid); // contributions from within this block
FineField oblock(FineGrid); // contributions from outwith this block
@ -162,8 +249,7 @@ namespace Grid {
CoarseScalar InnerProd(Grid());
// Orthogonalise the subblocks over the basis
blockOrthogonalise(InnerProd,subspace);
blockProject(iProj,subspace[0],subspace);
blockOrthogonalise(InnerProd,Subspace.subspace);
// Compute the matrix elements of linop between this orthonormal
// set of vectors.
@ -177,7 +263,10 @@ namespace Grid {
assert(self_stencil!=-1);
for(int i=0;i<nbasis;i++){
phi=subspace[i];
phi=Subspace.subspace[i];
std::cout<<GridLogMessage<<"("<<i<<").."<<std::endl;
for(int p=0;p<geom.npoint;p++){
int dir = geom.directions[p];
@ -210,8 +299,11 @@ namespace Grid {
assert(0);
}
blockProject(iProj,iblock,subspace);
blockProject(oProj,oblock,subspace);
Subspace.ProjectToSubspace(iProj,iblock);
Subspace.ProjectToSubspace(oProj,oblock);
// blockProject(iProj,iblock,Subspace.subspace);
// blockProject(oProj,oblock,Subspace.subspace);
PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
for(int j=0;j<nbasis;j++){
if( disp!= 0 ) {
@ -227,33 +319,33 @@ namespace Grid {
///////////////////////////
// test code worth preserving in if block
///////////////////////////
std::cout<< " Computed matrix elements "<< self_stencil <<std::endl;
std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl;
for(int p=0;p<geom.npoint;p++){
std::cout<< "A["<<p<<"]" << std::endl;
std::cout<< A[p] << std::endl;
std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl;
std::cout<<GridLogMessage<< A[p] << std::endl;
}
std::cout<< " picking by block0 "<< self_stencil <<std::endl;
std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl;
phi=subspace[0];
phi=Subspace.subspace[0];
std::vector<int> bc(FineGrid->_ndimension,0);
blockPick(Grid(),phi,tmp,bc); // Pick out a block
linop.Op(tmp,Mphi); // Apply big dop
blockProject(iProj,Mphi,subspace); // project it and print it
std::cout<< " Computed matrix elements from block zero only "<<std::endl;
std::cout<< iProj <<std::endl;
std::cout<<"Computed Coarse Operator"<<std::endl;
blockProject(iProj,Mphi,Subspace.subspace); // project it and print it
std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl;
std::cout<<GridLogMessage<< iProj <<std::endl;
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
#endif
// AssertHermitian();
// ForceHermitian();
// ForceDiagonal();
AssertHermitian();
// ForceDiagonal();
}
void ForceDiagonal(void) {
std::cout<<"**************************************************"<<std::endl;
std::cout<<"**** Forcing coarse operator to be diagonal ****"<<std::endl;
std::cout<<"**************************************************"<<std::endl;
std::cout<<GridLogMessage<<"**************************************************"<<std::endl;
std::cout<<GridLogMessage<<"**** Forcing coarse operator to be diagonal ****"<<std::endl;
std::cout<<GridLogMessage<<"**************************************************"<<std::endl;
for(int p=0;p<8;p++){
A[p]=zero;
}
@ -263,7 +355,7 @@ namespace Grid {
Complex one(1.0);
iMatrix<Complex,nbasis> ident; ident=one;
iMatrix<CComplex,nbasis> ident; ident=one;
val = val*adj(val);
val = val + 1.0;
@ -279,7 +371,7 @@ namespace Grid {
int dd=d+1;
A[2*d] = adj(Cshift(A[2*d+1],dd,1));
}
A[8] = 0.5*(A[8] + adj(A[8]));
// A[8] = 0.5*(A[8] + adj(A[8]));
}
void AssertHermitian(void) {
CoarseMatrix AA (Grid());
@ -293,13 +385,13 @@ namespace Grid {
Diff = AA - adj(AAc);
std::cout<<"Norm diff dim "<<d<<" "<< norm2(Diff)<<std::endl;
std::cout<<"Norm dim "<<d<<" "<< norm2(AA)<<std::endl;
std::cout<<GridLogMessage<<"Norm diff dim "<<d<<" "<< norm2(Diff)<<std::endl;
std::cout<<GridLogMessage<<"Norm dim "<<d<<" "<< norm2(AA)<<std::endl;
}
Diff = A[8] - adj(A[8]);
std::cout<<"Norm diff local "<< norm2(Diff)<<std::endl;
std::cout<<"Norm local "<< norm2(A[8])<<std::endl;
std::cout<<GridLogMessage<<"Norm diff local "<< norm2(Diff)<<std::endl;
std::cout<<GridLogMessage<<"Norm local "<< norm2(A[8])<<std::endl;
}
};

View File

@ -71,6 +71,47 @@ namespace Grid {
}
};
////////////////////////////////////////////////////////////////////
// Construct herm op and shift it for mgrid smoother
////////////////////////////////////////////////////////////////////
template<class Matrix,class Field>
class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
Matrix &_Mat;
RealD _shift;
public:
ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
// Support for coarsening to a multigrid
void OpDiag (const Field &in, Field &out) {
_Mat.Mdiag(in,out);
assert(0);
}
void OpDir (const Field &in, Field &out,int dir,int disp) {
_Mat.Mdir(in,out,dir,disp);
assert(0);
}
void Op (const Field &in, Field &out){
_Mat.M(in,out);
assert(0);
}
void AdjOp (const Field &in, Field &out){
_Mat.Mdag(in,out);
assert(0);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
_Mat.MdagM(in,out,n1,n2);
out = out + _shift*in;
ComplexD dot;
dot= innerProduct(in,out);
n1=real(dot);
n2=norm2(out);
}
void HermOp(const Field &in, Field &out){
RealD n1,n2;
HermOpAndNorm(in,out,n1,n2);
}
};
////////////////////////////////////////////////////////////////////
// Wrap an already herm matrix
////////////////////////////////////////////////////////////////////
@ -147,6 +188,7 @@ namespace Grid {
};
template<class Matrix,class Field>
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
protected:
Matrix &_Mat;
public:
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
@ -173,6 +215,7 @@ namespace Grid {
};
template<class Matrix,class Field>
class SchurDiagOneOperator : public SchurOperatorBase<Field> {
protected:
Matrix &_Mat;
public:
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
@ -199,6 +242,7 @@ namespace Grid {
}
};
/////////////////////////////////////////////////////////////
// Base classes for functions of operators
/////////////////////////////////////////////////////////////
@ -207,6 +251,11 @@ namespace Grid {
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
};
template<class Field> class LinearFunction {
public:
virtual void operator() (const Field &in, Field &out) = 0;
};
/////////////////////////////////////////////////////////////
// Base classes for Multishift solvers for operators
/////////////////////////////////////////////////////////////

View File

@ -0,0 +1,19 @@
#ifndef GRID_PRECONDITIONER_H
#define GRID_PRECONDITIONER_H
namespace Grid {
template<class Field> class Preconditioner : public LinearFunction<Field> {
virtual void operator()(const Field &src, Field & psi)=0;
};
template<class Field> class TrivialPrecon : public Preconditioner<Field> {
public:
void operator()(const Field &src, Field & psi){
psi = src;
}
TrivialPrecon(void){};
};
}
#endif

View File

@ -9,23 +9,34 @@ namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////
// Simple general polynomial with user supplied coefficients
////////////////////////////////////////////////////////////////////////////////////////////
template<class Field>
class HermOpOperatorFunction : public OperatorFunction<Field> {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Linop.HermOp(in,out);
};
};
template<class Field>
class Polynomial : public OperatorFunction<Field> {
private:
std::vector<double> Coeffs;
std::vector<RealD> Coeffs;
public:
Polynomial(std::vector<double> &_Coeffs) : Coeffs(_Coeffs) {};
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Field AtoN = in;
Field AtoN(in._grid);
Field Mtmp(in._grid);
AtoN = in;
out = AtoN*Coeffs[0];
// std::cout <<"Poly in " <<norm2(in)<<std::endl;
// std::cout <<"0 " <<norm2(out)<<std::endl;
for(int n=1;n<Coeffs.size();n++){
Field Mtmp=AtoN;
Linop.Op(Mtmp,AtoN);
Mtmp = AtoN;
Linop.HermOp(Mtmp,AtoN);
out=out+AtoN*Coeffs[n];
// std::cout << n<<" " <<norm2(out)<<std::endl;
}
};
};
@ -36,21 +47,36 @@ namespace Grid {
template<class Field>
class Chebyshev : public OperatorFunction<Field> {
private:
std::vector<double> Coeffs;
std::vector<RealD> Coeffs;
int order;
double hi;
double lo;
RealD hi;
RealD lo;
public:
void csv(std::ostream &out){
for (double x=lo; x<hi; x+=(hi-lo)/1000) {
double f = approx(x);
for (RealD x=lo; x<hi; x+=(hi-lo)/1000) {
RealD f = approx(x);
out<< x<<" "<<f<<std::endl;
}
return;
}
Chebyshev(double _lo,double _hi,int _order, double (* func)(double) ){
// Convenience for plotting the approximation
void PlotApprox(std::ostream &out) {
out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
for(RealD x=lo;x<hi;x+=(hi-lo)/50.0){
out <<x<<"\t"<<approx(x)<<std::endl;
}
};
Chebyshev(){};
Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
////////////////////////////////////////////////////////////////////////////////////////////////////
// c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
////////////////////////////////////////////////////////////////////////////////////////////////////
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
{
lo=_lo;
hi=_hi;
order=_order;
@ -58,29 +84,58 @@ namespace Grid {
if(order < 2) exit(-1);
Coeffs.resize(order);
for(int j=0;j<order;j++){
double s=0;
RealD s=0;
for(int k=0;k<order;k++){
double y=std::cos(M_PI*(k+0.5)/order);
double x=0.5*(y*(hi-lo)+(hi+lo));
double f=func(x);
RealD y=std::cos(M_PI*(k+0.5)/order);
RealD x=0.5*(y*(hi-lo)+(hi+lo));
RealD f=func(x);
s=s+f*std::cos( j*M_PI*(k+0.5)/order );
}
Coeffs[j] = s * 2.0/order;
}
};
double approx(double x) // Convenience for plotting the approximation
void JacksonSmooth(void){
RealD M=order;
RealD alpha = M_PI/(M+2);
RealD lmax = std::cos(alpha);
RealD sumUsq =0;
std::vector<RealD> U(M);
std::vector<RealD> a(M);
std::vector<RealD> g(M);
for(int n=0;n<=M;n++){
U[n] = std::sin((n+1)*std::acos(lmax))/std::sin(std::acos(lmax));
sumUsq += U[n]*U[n];
}
sumUsq = std::sqrt(sumUsq);
for(int i=1;i<=M;i++){
a[i] = U[i]/sumUsq;
}
g[0] = 1.0;
for(int m=1;m<=M;m++){
g[m] = 0;
for(int i=0;i<=M-m;i++){
g[m]+= a[i]*a[m+i];
}
}
for(int m=1;m<=M;m++){
Coeffs[m]*=g[m];
}
}
RealD approx(RealD x) // Convenience for plotting the approximation
{
double Tn;
double Tnm;
double Tnp;
RealD Tn;
RealD Tnm;
RealD Tnp;
double y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
double T0=1;
double T1=y;
RealD T0=1;
RealD T1=y;
double sum;
RealD sum;
sum = 0.5*Coeffs[0]*T0;
sum+= Coeffs[1]*T1;
@ -95,46 +150,38 @@ namespace Grid {
return sum;
};
// Convenience for plotting the approximation
void PlotApprox(std::ostream &out) {
out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
for(double x=lo;x<hi;x+=(hi-lo)/50.0){
out <<x<<"\t"<<approx(x)<<std::endl;
}
};
// Implement the required interface; could require Lattice base class
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Field T0 = in;
Field T1 = T0; // Field T1(T0._grid); more efficient but hardwires Lattice class
Field T2 = T1;
GridBase *grid=in._grid;
int vol=grid->gSites();
Field T0(grid); T0 = in;
Field T1(grid);
Field T2(grid);
Field y(grid);
// use a pointer trick to eliminate copies
Field *Tnm = &T0;
Field *Tn = &T1;
Field *Tnp = &T2;
Field y = in;
double xscale = 2.0/(hi-lo);
double mscale = -(hi+lo)/(hi-lo);
// Tn=T1 = (xscale M + mscale)in
Linop.Op(T0,y);
RealD xscale = 2.0/(hi-lo);
RealD mscale = -(hi+lo)/(hi-lo);
Linop.HermOp(T0,y);
T1=y*xscale+in*mscale;
// sum = .5 c[0] T0 + c[1] T1
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
for(int n=2;n<order;n++){
Linop.Op(*Tn,y);
Linop.HermOp(*Tn,y);
y=xscale*y+mscale*(*Tn);
*Tnp=2.0*y-(*Tnm);
out=out+Coeffs[n]* (*Tnp);
// Cycle pointers to avoid copies
@ -148,5 +195,121 @@ namespace Grid {
};
template<class Field>
class ChebyshevLanczos : public Chebyshev<Field> {
private:
std::vector<RealD> Coeffs;
int order;
RealD alpha;
RealD beta;
RealD mu;
public:
ChebyshevLanczos(RealD _alpha,RealD _beta,RealD _mu,int _order) :
alpha(_alpha),
beta(_beta),
mu(_mu)
{
order=_order;
Coeffs.resize(order);
for(int i=0;i<_order;i++){
Coeffs[i] = 0.0;
}
Coeffs[order-1]=1.0;
};
void csv(std::ostream &out){
for (RealD x=-1.2*alpha; x<1.2*alpha; x+=(2.0*alpha)/10000) {
RealD f = approx(x);
out<< x<<" "<<f<<std::endl;
}
return;
}
RealD approx(RealD xx) // Convenience for plotting the approximation
{
RealD Tn;
RealD Tnm;
RealD Tnp;
Real aa = alpha * alpha;
Real bb = beta * beta;
RealD x = ( 2.0 * (xx-mu)*(xx-mu) - (aa+bb) ) / (aa-bb);
RealD y= x;
RealD T0=1;
RealD T1=y;
RealD sum;
sum = 0.5*Coeffs[0]*T0;
sum+= Coeffs[1]*T1;
Tn =T1;
Tnm=T0;
for(int i=2;i<order;i++){
Tnp=2*y*Tn-Tnm;
Tnm=Tn;
Tn =Tnp;
sum+= Tn*Coeffs[i];
}
return sum;
};
// shift_Multiply in Rudy's code
void AminusMuSq(LinearOperatorBase<Field> &Linop, const Field &in, Field &out)
{
GridBase *grid=in._grid;
Field tmp(grid);
RealD aa= alpha*alpha;
RealD bb= beta * beta;
Linop.HermOp(in,out);
out = out - mu*in;
Linop.HermOp(out,tmp);
tmp = tmp - mu * out;
out = (2.0/ (aa-bb) ) * tmp - ((aa+bb)/(aa-bb))*in;
};
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
GridBase *grid=in._grid;
int vol=grid->gSites();
Field T0(grid); T0 = in;
Field T1(grid);
Field T2(grid);
Field y(grid);
Field *Tnm = &T0;
Field *Tn = &T1;
Field *Tnp = &T2;
// Tn=T1 = (xscale M )*in
AminusMuSq(Linop,T0,T1);
// sum = .5 c[0] T0 + c[1] T1
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
for(int n=2;n<order;n++){
AminusMuSq(Linop,*Tn,y);
*Tnp=2.0*y-(*Tnm);
out=out+Coeffs[n]* (*Tnp);
// Cycle pointers to avoid copies
Field *swizzle = Tnm;
Tnm =Tn;
Tn =Tnp;
Tnp =swizzle;
}
}
};
}
#endif

View File

@ -1,6 +1,8 @@
#ifndef MULTI_SHIFT_FUNCTION
#define MULTI_SHIFT_FUNCTION
namespace Grid {
class MultiShiftFunction {
public:
int order;
@ -9,20 +11,29 @@ public:
std::vector<RealD> tolerances;
RealD norm;
RealD lo,hi;
MultiShiftFunction(int n,RealD _lo,RealD _hi): poles(n), residues(n), lo(_lo), hi(_hi) {;};
RealD approx(RealD x);
void csv(std::ostream &out);
void gnuplot(std::ostream &out);
MultiShiftFunction(AlgRemez & remez,double tol,bool inverse) :
order(remez.getDegree()),
tolerances(remez.getDegree(),tol),
poles(remez.getDegree()),
residues(remez.getDegree())
void Init(AlgRemez & remez,double tol,bool inverse)
{
order=remez.getDegree();
tolerances.resize(remez.getDegree(),tol);
poles.resize(remez.getDegree());
residues.resize(remez.getDegree());
remez.getBounds(lo,hi);
if ( inverse ) remez.getIPFE (&residues[0],&poles[0],&norm);
else remez.getPFE (&residues[0],&poles[0],&norm);
else remez.getPFE (&residues[0],&poles[0],&norm);
}
// Allow deferred initialisation
MultiShiftFunction(void){};
MultiShiftFunction(AlgRemez & remez,double tol,bool inverse)
{
Init(remez,tol,inverse);
}
};
}
#endif

View File

@ -758,3 +758,4 @@ void AlgRemez::csv(std::ostream & os)
}
return;
}

View File

@ -15,7 +15,10 @@
#ifndef INCLUDED_ALG_REMEZ_H
#define INCLUDED_ALG_REMEZ_H
#include <algorithms/approx/bigfloat.h>
#include <stddef.h>
//#include <algorithms/approx/bigfloat.h>
#include <algorithms/approx/bigfloat_double.h>
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
#define SUM_MAX 10 // Maximum number of terms in exponential
@ -28,6 +31,7 @@
remez.getIPFE(res,pole,&norm);
remez.csv(ostream &os);
*/
class AlgRemez
{
private:

View File

@ -0,0 +1,370 @@
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
/*
* Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv.
* Script A = SolverMatrix
* Script P = Preconditioner
*
* Deflation methods considered
* -- Solve P A x = P b [ like Luscher ]
* DEF-1 M P A x = M P b [i.e. left precon]
* DEF-2 P^T M A x = P^T M b
* ADEF-1 Preconditioner = M P + Q [ Q + M + M A Q]
* ADEF-2 Preconditioner = P^T M + Q
* BNN Preconditioner = P^T M P + Q
* BNN2 Preconditioner = M P + P^TM +Q - M P A M
*
* Implement ADEF-2
*
* Vstart = P^Tx + Qb
* M1 = P^TM + Q
* M2=M3=1
* Vout = x
*/
// abstract base
template<class Field, class CoarseField>
class TwoLevelFlexiblePcg : public LinearFunction<Field>
{
public:
int verbose;
RealD Tolerance;
Integer MaxIterations;
const int mmax = 5;
GridBase *grid;
GridBase *coarsegrid;
LinearOperatorBase<Field> *_Linop
OperatorFunction<Field> *_Smoother,
LinearFunction<CoarseField> *_CoarseSolver;
// Need somthing that knows how to get from Coarse to fine and back again
// more most opertor functions
TwoLevelFlexiblePcg(RealD tol,
Integer maxit,
LinearOperatorBase<Field> *Linop,
LinearOperatorBase<Field> *SmootherLinop,
OperatorFunction<Field> *Smoother,
OperatorFunction<CoarseField> CoarseLinop
) :
Tolerance(tol),
MaxIterations(maxit),
_Linop(Linop),
_PreconditionerLinop(PrecLinop),
_Preconditioner(Preconditioner)
{
verbose=0;
};
// The Pcg routine is common to all, but the various matrices differ from derived
// implementation to derived implmentation
void operator() (const Field &src, Field &psi){
void operator() (const Field &src, Field &psi){
psi.checkerboard = src.checkerboard;
grid = src._grid;
RealD f;
RealD rtzp,rtz,a,d,b;
RealD rptzp;
RealD tn;
RealD guess = norm2(psi);
RealD ssq = norm2(src);
RealD rsq = ssq*Tolerance*Tolerance;
/////////////////////////////
// Set up history vectors
/////////////////////////////
std::vector<Field> p (mmax,grid);
std::vector<Field> mmp(mmax,grid);
std::vector<RealD> pAp(mmax);
Field x (grid); x = psi;
Field z (grid);
Field tmp(grid);
Field r (grid);
Field mu (grid);
//////////////////////////
// x0 = Vstart -- possibly modify guess
//////////////////////////
x=src;
Vstart(x,src);
// r0 = b -A x0
HermOp(x,mmp); // Shouldn't this be something else?
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
//////////////////////////////////
// Compute z = M1 x
//////////////////////////////////
M1(r,z,tmp,mp,SmootherMirs);
rtzp =real(innerProduct(r,z));
///////////////////////////////////////
// Solve for Mss mu = P A z and set p = z-mu
// Def2: p = 1 - Q Az = Pright z
// Other algos M2 is trivial
///////////////////////////////////////
M2(z,p[0]);
for (int k=0;k<=MaxIterations;k++){
int peri_k = k % mmax;
int peri_kp = (k+1) % mmax;
rtz=rtzp;
d= M3(p[peri_k],mp,mmp[peri_k],tmp);
a = rtz/d;
// Memorise this
pAp[peri_k] = d;
axpy(x,a,p[peri_k],x);
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
// Compute z = M x
M1(r,z,tmp,mp);
rtzp =real(innerProduct(r,z));
M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
p[peri_kp]=p[peri_k];
// Standard search direction p -> z + b p ; b =
b = (rtzp)/rtz;
int northog;
// northog = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm
northog = (k>mmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm
for(int back=0; back < northog; back++){
int peri_back = (k-back)%mmax;
RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp]));
RealD beta = -pbApk/pAp[peri_back];
axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]);
}
RealD rrn=sqrt(rn/ssq);
std::cout<<GridLogMessage<<"TwoLevelfPcg: k= "<<k<<" residual = "<<rrn<<std::endl;
// Stopping condition
if ( rn <= rsq ) {
HermOp(x,mmp); // Shouldn't this be something else?
axpy(tmp,-1.0,src,mmp[0]);
RealD psinorm = sqrt(norm2(x));
RealD srcnorm = sqrt(norm2(src));
RealD tmpnorm = sqrt(norm2(tmp));
RealD true_residual = tmpnorm/srcnorm;
std::cout<<GridLogMessage<<"TwoLevelfPcg: true residual is "<<true_residual<<std::endl;
std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
return k;
}
}
// Non-convergence
assert(0);
}
public:
virtual void M(Field & in,Field & out,Field & tmp) {
}
virtual void M1(Field & in, Field & out) {// the smoother
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
Field tmp(grid);
Field Min(grid);
PcgM(in,Min); // Smoother call
HermOp(Min,out);
axpy(tmp,-1.0,out,in); // tmp = in - A Min
ProjectToSubspace(tmp,PleftProj);
ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
axpy(out,1.0,Min,tmp); // Min+tmp
}
virtual void M2(const Field & in, Field & out) {
out=in;
// Must override for Def2 only
// case PcgDef2:
// Pright(in,out);
// break;
}
virtual RealD M3(const Field & p, Field & mmp){
double d,dd;
HermOpAndNorm(p,mmp,d,dd);
return dd;
// Must override for Def1 only
// case PcgDef1:
// d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
// linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
// Pleft(mp,mmp);
// d=real(linop_d->inner(p,mmp));
}
virtual void VstartDef2(Field & xconst Field & src){
//case PcgDef2:
//case PcgAdef2:
//case PcgAdef2f:
//case PcgV11f:
///////////////////////////////////
// Choose x_0 such that
// x_0 = guess + (A_ss^inv) r_s = guess + Ass_inv [src -Aguess]
// = [1 - Ass_inv A] Guess + Assinv src
// = P^T guess + Assinv src
// = Vstart [Tang notation]
// This gives:
// W^T (src - A x_0) = src_s - A guess_s - r_s
// = src_s - (A guess)_s - src_s + (A guess)_s
// = 0
///////////////////////////////////
Field r(grid);
Field mmp(grid);
HermOp(x,mmp);
axpy (r, -1.0, mmp, src); // r_{-1} = src - A x
ProjectToSubspace(r,PleftProj);
ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
PromoteFromSubspace(PleftMss_proj,mmp);
x=x+mmp;
}
virtual void Vstart(Field & x,const Field & src){
return;
}
/////////////////////////////////////////////////////////////////////
// Only Def1 has non-trivial Vout. Override in Def1
/////////////////////////////////////////////////////////////////////
virtual void Vout (Field & in, Field & out,Field & src){
out = in;
//case PcgDef1:
// //Qb + PT x
// ProjectToSubspace(src,PleftProj);
// ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
// PromoteFromSubspace(PleftMss_proj,tmp);
//
// Pright(in,out);
//
// linop_d->axpy(out,tmp,out,1.0);
// break;
}
////////////////////////////////////////////////////////////////////////////////////////////////
// Pright and Pleft are common to all implementations
////////////////////////////////////////////////////////////////////////////////////////////////
virtual void Pright(Field & in,Field & out){
// P_R = [ 1 0 ]
// [ -Mss^-1 Msb 0 ]
Field in_sbar(grid);
ProjectToSubspace(in,PleftProj);
PromoteFromSubspace(PleftProj,out);
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
HermOp(in_sbar,out);
ProjectToSubspace(out,PleftProj); // Mssbar in_sbar (project)
ApplyInverse (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
PromoteFromSubspace(PleftMss_proj,out); //
axpy(out,-1.0,out,in_sbar); // in_sbar - Mss^{-1} Mssbar in_sbar
}
virtual void Pleft (Field & in,Field & out){
// P_L = [ 1 -Mbs Mss^-1]
// [ 0 0 ]
Field in_sbar(grid);
Field tmp2(grid);
Field Mtmp(grid);
ProjectToSubspace(in,PleftProj);
PromoteFromSubspace(PleftProj,out);
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
PromoteFromSubspace(PleftMss_proj,out);
HermOp(out,Mtmp);
ProjectToSubspace(Mtmp,PleftProj); // Msbar s Mss^{-1}
PromoteFromSubspace(PleftProj,tmp2);
axpy(out,-1.0,tmp2,Mtmp);
axpy(out,-1.0,out,in_sbar); // in_sbar - Msbars Mss^{-1} in_s
}
}
template<class Field>
class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp){
}
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
}
virtual void M2(Field & in, Field & out){
}
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
}
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
}
}
/*
template<class Field>
class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
}
template<class Field>
class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
virtual void Vout (Field & in, Field & out,Field & src,Field & tmp);
}
template<class Field>
class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
}
template<class Field>
class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
}
*/
#endif

View File

@ -13,9 +13,7 @@ namespace Grid {
public:
RealD Tolerance;
Integer MaxIterations;
int verbose;
ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
verbose=1;
};
@ -42,14 +40,12 @@ public:
cp =a;
ssq=norm2(src);
if ( verbose ) {
std::cout <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
std::cout <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
}
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
RealD rsq = Tolerance* Tolerance*ssq;
@ -58,7 +54,7 @@ public:
return;
}
std::cout << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
int k;
for (k=1;k<=MaxIterations;k++){
@ -69,23 +65,19 @@ public:
RealD qqck = norm2(mmp);
ComplexD dck = innerProduct(p,mmp);
// if (verbose) std::cout <<std::setprecision(4)<< "ConjugateGradient: d,qq "<<d<< " "<<qq <<" qqcheck "<< qqck<< " dck "<< dck<<std::endl;
a = c/d;
b_pred = a*(a*qq-d)/c;
// if (verbose) std::cout <<std::setprecision(4)<< "ConjugateGradient: a,bp "<<a<< " "<<b_pred <<std::endl;
cp = axpy_norm(r,-a,mmp,r);
b = cp/c;
// std::cout <<std::setprecision(4)<< "ConjugateGradient: cp,b "<<cp<< " "<<b <<std::endl;
// Fuse these loops ; should be really easy
psi= a*p+psi;
p = p*b+r;
if (verbose) std::cout<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
// Stopping condition
if ( cp <= rsq ) {
@ -98,13 +90,14 @@ public:
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm/srcnorm;
std::cout<<"ConjugateGradient: Converged on iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
std::cout<<"ConjugateGradient: true residual is "<<true_residual<<" sol "<<psinorm<<" src "<<srcnorm<<std::endl;
std::cout<<"ConjugateGradient: target residual was "<<Tolerance<<std::endl;
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
<<" computed residual "<<sqrt(cp/ssq)
<<" true residual "<<true_residual
<<" target "<<Tolerance<<std::endl;
return;
}
}
std::cout<<"ConjugateGradient did NOT converge"<<std::endl;
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
assert(0);
}
};

View File

@ -27,10 +27,14 @@ public:
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
{
GridBase *grid = src._grid;
int nshift = shifts.order;
std::vector<Field> results(nshift,grid);
(*this)(Linop,src,results,psi);
}
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi)
{
int nshift = shifts.order;
(*this)(Linop,src,results);
@ -91,7 +95,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
cp = norm2(src);
for(int s=0;s<nshift;s++){
rsq[s] = cp * mresidual[s] * mresidual[s];
std::cout<<"ConjugateGradientMultiShift: shift "<<s
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift: shift "<<s
<<" target resid "<<rsq[s]<<std::endl;
ps[s] = src;
}
@ -109,7 +113,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
// p and mmp is equal to d after this since
// the d computation is tricky
// qq = real(innerProduct(p,mmp));
// std::cout << "debug equal ? qq "<<qq<<" d "<< d<<std::endl;
// std::cout<<GridLogMessage << "debug equal ? qq "<<qq<<" d "<< d<<std::endl;
b = -cp /d;
@ -214,7 +218,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
if(css<rsq[s]){
if ( ! converged[s] )
std::cout<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
converged[s]=1;
} else {
all_converged=0;
@ -225,8 +229,8 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
if ( all_converged ){
std::cout<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
std::cout<< "CGMultiShift: Checking solutions"<<std::endl;
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
// Check answers
for(int s=0; s < nshift; s++) {
@ -235,13 +239,13 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
axpy(r,-alpha[s],src,tmp);
RealD rn = norm2(r);
RealD cn = norm2(src);
std::cout<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
}
return;
}
}
// ugly hack
std::cout<<"CG multi shift did not converge"<<std::endl;
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
assert(0);
}

View File

@ -16,7 +16,7 @@ namespace Grid {
int verbose;
ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
verbose=1;
verbose=0;
};
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
@ -37,14 +37,11 @@ namespace Grid {
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
std::cout << "pAp, pAAp"<< pAp<<" "<<pAAp<<std::endl;
std::cout << "rAr, rAAr"<< rAr<<" "<<rAAr<<std::endl;
cp =norm2(r);
ssq=norm2(src);
rsq=Tolerance*Tolerance*ssq;
std::cout<<"ConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
if (verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
for(int k=1;k<MaxIterations;k++){
@ -62,22 +59,23 @@ namespace Grid {
axpy(p,b,p,r);
pAAp=axpy_norm(Ap,b,Ap,Ar);
std::cout<<"ConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
if(verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
if(cp<rsq) {
Linop.HermOp(psi,Ap);
axpy(r,-1.0,src,Ap);
RealD true_resid = norm2(r);
std::cout<<"ConjugateResidual: Converged on iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
std::cout<<"ConjugateResidual: true residual is "<<true_resid<<std::endl;
std::cout<<"ConjugateResidual: target residual was "<<Tolerance <<std::endl;
RealD true_resid = norm2(r)/ssq;
std::cout<<GridLogMessage<<"ConjugateResidual: Converged on iteration " <<k
<< " computed residual "<<sqrt(cp/ssq)
<< " true residual "<<sqrt(true_resid)
<< " target " <<Tolerance <<std::endl;
return;
}
}
std::cout<<"ConjugateResidual did NOT converge"<<std::endl;
std::cout<<GridLogMessage<<"ConjugateResidual did NOT converge"<<std::endl;
assert(0);
}
};

View File

@ -0,0 +1,109 @@
#ifndef GRID_DENSE_MATRIX_H
#define GRID_DENSE_MATRIX_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Matrix untils
/////////////////////////////////////////////////////////////
template<class T> using DenseVector = std::vector<T>;
template<class T> using DenseMatrix = DenseVector<DenseVector<T> >;
template<class T> void Size(DenseVector<T> & vec, int &N)
{
N= vec.size();
}
template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)
{
N= mat.size();
M= mat[0].size();
}
template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)
{
int M; Size(mat,N,M);
assert(N==M);
}
template<class T> void Resize(DenseVector<T > & mat, int N) {
mat.resize(N);
}
template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {
mat.resize(N);
for(int i=0;i<N;i++){
mat[i].resize(M);
}
}
template<class T> void Fill(DenseMatrix<T> & mat, T&val) {
int N,M;
Size(mat,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
mat[i][j] = val;
}}
}
/** Transpose of a matrix **/
template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){
int N,M;
Size(mat,N,M);
DenseMatrix<T> C; Resize(C,M,N);
for(int i=0;i<M;i++){
for(int j=0;j<N;j++){
C[i][j] = mat[j][i];
}}
return C;
}
/** Set DenseMatrix to unit matrix **/
template<class T> void Unity(DenseMatrix<T> &A){
int N; SizeSquare(A,N);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if ( i==j ) A[i][j] = 1;
else A[i][j] = 0;
}
}
}
/** Add C * I to matrix **/
template<class T>
void PlusUnit(DenseMatrix<T> & A,T c){
int dim; SizeSquare(A,dim);
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
}
/** return the Hermitian conjugate of matrix **/
template<class T>
DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){
int dim; SizeSquare(mat,dim);
DenseMatrix<T> C; Resize(C,dim,dim);
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
C[i][j] = conj(mat[j][i]);
}
}
return C;
}
/**Get a square submatrix**/
template <class T>
DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end)
{
DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st);
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
H[i-row_st][j-col_st]=A[i][j];
}}
return H;
}
}
#include <algorithms/iterative/Householder.h>
#include <algorithms/iterative/Francis.h>
#endif

View File

@ -0,0 +1,52 @@
#ifndef GRID_EIGENSORT_H
#define GRID_EIGENSORT_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Eigen sorter to begin with
/////////////////////////////////////////////////////////////
template<class Field>
class SortEigen {
private:
static bool less_lmd(RealD left,RealD right){
return fabs(left) < fabs(right);
}
static bool less_pair(std::pair<RealD,Field>& left,
std::pair<RealD,Field>& right){
return fabs(left.first) < fabs(right.first);
}
public:
void push(DenseVector<RealD>& lmd,
DenseVector<Field>& evec,int N) {
DenseVector<std::pair<RealD, Field> > emod;
typename DenseVector<std::pair<RealD, Field> >::iterator it;
for(int i=0;i<lmd.size();++i){
emod.push_back(std::pair<RealD,Field>(lmd[i],evec[i]));
}
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
it=emod.begin();
for(int i=0;i<N;++i){
lmd[i]=it->first;
evec[i]=it->second;
++it;
}
}
void push(DenseVector<RealD>& lmd,int N) {
std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd);
}
bool saturated(RealD lmd, RealD thrs) {
return fabs(lmd) > fabs(thrs);
}
};
}
#endif

View File

@ -0,0 +1,498 @@
#ifndef FRANCIS_H
#define FRANCIS_H
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <fstream>
#include <complex>
#include <algorithm>
//#include <timer.h>
//#include <lapacke.h>
//#include <Eigen/Dense>
namespace Grid {
template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
template <class T> int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
/**
Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm.
H =
x x x x x x x x x
x x x x x x x x x
0 x x x x x x x x
0 0 x x x x x x x
0 0 0 x x x x x x
0 0 0 0 x x x x x
0 0 0 0 0 x x x x
0 0 0 0 0 0 x x x
0 0 0 0 0 0 0 x x
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.
**/
template <class T>
int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
{
DenseMatrix<T> H = Hin;
int N ; SizeSquare(H,N);
int M = N;
Fill(evals,0);
Fill(evecs,0);
T s,t,x=0,y=0,z=0;
T u,d;
T apd,amd,bc;
DenseVector<T> p(N,0);
T nrm = Norm(H); ///DenseMatrix Norm
int n, m;
int e = 0;
int it = 0;
int tot_it = 0;
int l = 0;
int r = 0;
DenseMatrix<T> P; Resize(P,N,N); Unity(P);
DenseVector<int> trows(N,0);
/// Check if the matrix is really hessenberg, if not abort
RealD sth = 0;
for(int j=0;j<N;j++){
for(int i=j+2;i<N;i++){
sth = abs(H[i][j]);
if(sth > small){
std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl;
exit(1);
}
}
}
do{
std::cout << "Francis QR Step N = " << N << std::endl;
/** Check for convergence
x x x x x
0 x x x x
0 0 x x x
0 0 x x x
0 0 0 0 x
for this matrix l = 4
**/
do{
l = Chop_subdiag(H,nrm,e,small);
r = 0; ///May have converged on more than one eval
///Single eval
if(l == N-1){
evals[e] = H[l][l];
N--; e++; r++; it = 0;
}
///RealD eval
if(l == N-2){
trows[l+1] = 1; ///Needed for UTSolve
apd = H[l][l] + H[l+1][l+1];
amd = H[l][l] - H[l+1][l+1];
bc = (T)4.0*H[l+1][l]*H[l][l+1];
evals[e] = (T)0.5*( apd + sqrt(amd*amd + bc) );
evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) );
N-=2; e+=2; r++; it = 0;
}
} while(r>0);
if(N ==0) break;
DenseVector<T > ck; Resize(ck,3);
DenseVector<T> v; Resize(v,3);
for(int m = N-3; m >= l; m--){
///Starting vector essentially random shift.
if(it%10 == 0 && N >= 3 && it > 0){
s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
z = H[m+1][m]*H[m+2][m+1];
}
///Starting vector implicit Q theorem
else{
s = (H[N-2][N-2] + H[N-1][N-1]);
t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]);
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
z = H[m+1][m]*H[m+2][m+1];
}
ck[0] = x; ck[1] = y; ck[2] = z;
if(m == l) break;
/** Some stupid thing from numerical recipies, seems to work**/
// PAB.. for heaven's sake quote page, purpose, evidence it works.
// what sort of comment is that!?!?!?
u=abs(H[m][m-1])*(abs(y)+abs(z));
d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1]));
if ((T)abs(u+d) == (T)abs(d) ){
l = m; break;
}
//if (u < small){l = m; break;}
}
if(it > 100000){
std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl;
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
exit(1);
}
normalize(ck); ///Normalization cancels in PHP anyway
T beta;
Householder_vector<T >(ck, 0, 2, v, beta);
Householder_mult<T >(H,v,beta,0,l,l+2,0);
Householder_mult<T >(H,v,beta,0,l,l+2,1);
///Accumulate eigenvector
Householder_mult<T >(P,v,beta,0,l,l+2,1);
int sw = 0; ///Are we on the last row?
for(int k=l;k<N-2;k++){
x = H[k+1][k];
y = H[k+2][k];
z = (T)0.0;
if(k+3 <= N-1){
z = H[k+3][k];
} else{
sw = 1;
v[2] = (T)0.0;
}
ck[0] = x; ck[1] = y; ck[2] = z;
normalize(ck);
Householder_vector<T >(ck, 0, 2-sw, v, beta);
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0);
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1);
///Accumulate eigenvector
Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1);
}
it++;
tot_it++;
}while(N > 1);
N = evals.size();
///Annoying - UT solves in reverse order;
DenseVector<T> tmp; Resize(tmp,N);
for(int i=0;i<N;i++){
tmp[i] = evals[N-i-1];
}
evals = tmp;
UTeigenvectors(H, trows, evals, evecs);
for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);}
return tot_it;
}
template <class T>
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
{
/**
Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm.
H =
x x 0 0 0 0
x x x 0 0 0
0 x x x 0 0
0 0 x x x 0
0 0 0 x x x
0 0 0 0 x x
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. **/
return my_Wilkinson(Hin, evals, evecs, small, small);
}
template <class T>
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol)
{
int N; SizeSquare(Hin,N);
int M = N;
///I don't want to modify the input but matricies must be passed by reference
//Scale a matrix by its "norm"
//RealD Hnorm = abs( Hin.LargestDiag() ); H = H*(1.0/Hnorm);
DenseMatrix<T> H; H = Hin;
RealD Hnorm = abs(Norm(Hin));
H = H * (1.0 / Hnorm);
// TODO use openmp and memset
Fill(evals,0);
Fill(evecs,0);
T s, t, x = 0, y = 0, z = 0;
T u, d;
T apd, amd, bc;
DenseVector<T> p; Resize(p,N); Fill(p,0);
T nrm = Norm(H); ///DenseMatrix Norm
int n, m;
int e = 0;
int it = 0;
int tot_it = 0;
int l = 0;
int r = 0;
DenseMatrix<T> P; Resize(P,N,N);
Unity(P);
DenseVector<int> trows(N, 0);
/// Check if the matrix is really symm tridiag
RealD sth = 0;
for(int j = 0; j < N; ++j)
{
for(int i = j + 2; i < N; ++i)
{
if(abs(H[i][j]) > tol || abs(H[j][i]) > tol)
{
std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl;
std::cout << "Warning tridiagonalize and call again" << std::endl;
// exit(1); // see what is going on
//return;
}
}
}
do{
do{
//Jasper
//Check if the subdiagonal term is small enough (<small)
//if true then it is converged.
//check start from H.dim - e - 1
//How to deal with more than 2 are converged?
//What if Chop_symm_subdiag return something int the middle?
//--------------
l = Chop_symm_subdiag(H,nrm, e, small);
r = 0; ///May have converged on more than one eval
//Jasper
//In this case
// x x 0 0 0 0
// x x x 0 0 0
// 0 x x x 0 0
// 0 0 x x x 0
// 0 0 0 x x 0
// 0 0 0 0 0 x <- l
//--------------
///Single eval
if(l == N - 1)
{
evals[e] = H[l][l];
N--;
e++;
r++;
it = 0;
}
//Jasper
// x x 0 0 0 0
// x x x 0 0 0
// 0 x x x 0 0
// 0 0 x x 0 0
// 0 0 0 0 x x <- l
// 0 0 0 0 x x
//--------------
///RealD eval
if(l == N - 2)
{
trows[l + 1] = 1; ///Needed for UTSolve
apd = H[l][l] + H[l + 1][ l + 1];
amd = H[l][l] - H[l + 1][l + 1];
bc = (T) 4.0 * H[l + 1][l] * H[l][l + 1];
evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc));
evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc));
N -= 2;
e += 2;
r++;
it = 0;
}
}while(r > 0);
//Jasper
//Already converged
//--------------
if(N == 0) break;
DenseVector<T> ck,v; Resize(ck,2); Resize(v,2);
for(int m = N - 3; m >= l; m--)
{
///Starting vector essentially random shift.
if(it%10 == 0 && N >= 3 && it > 0)
{
t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]);
x = H[m][m] - t;
z = H[m + 1][m];
} else {
///Starting vector implicit Q theorem
d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5;
t = H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]
/ (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2]));
x = H[m][m] - t;
z = H[m + 1][m];
}
//Jasper
//why it is here????
//-----------------------
if(m == l)
break;
u = abs(H[m][m - 1]) * (abs(y) + abs(z));
d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1]));
if ((T)abs(u + d) == (T)abs(d))
{
l = m;
break;
}
}
//Jasper
if(it > 1000000)
{
std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl;
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
exit(1);
}
//
T s, c;
Givens_calc<T>(x, z, c, s);
Givens_mult<T>(H, l, l + 1, c, -s, 0);
Givens_mult<T>(H, l, l + 1, c, s, 1);
Givens_mult<T>(P, l, l + 1, c, s, 1);
//
for(int k = l; k < N - 2; ++k)
{
x = H.A[k + 1][k];
z = H.A[k + 2][k];
Givens_calc<T>(x, z, c, s);
Givens_mult<T>(H, k + 1, k + 2, c, -s, 0);
Givens_mult<T>(H, k + 1, k + 2, c, s, 1);
Givens_mult<T>(P, k + 1, k + 2, c, s, 1);
}
it++;
tot_it++;
}while(N > 1);
N = evals.size();
///Annoying - UT solves in reverse order;
DenseVector<T> tmp(N);
for(int i = 0; i < N; ++i)
tmp[i] = evals[N-i-1];
evals = tmp;
//
UTeigenvectors(H, trows, evals, evecs);
//UTSymmEigenvectors(H, trows, evals, evecs);
for(int i = 0; i < evals.size(); ++i)
{
evecs[i] = P * evecs[i];
normalize(evecs[i]);
evals[i] = evals[i] * Hnorm;
}
// // FIXME this is to test
// Hin.write("evecs3", evecs);
// Hin.write("evals3", evals);
// // check rsd
// for(int i = 0; i < M; i++) {
// vector<T> Aevec = Hin * evecs[i];
// RealD norm2(0.);
// for(int j = 0; j < M; j++) {
// norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]);
// }
// }
return tot_it;
}
template <class T>
void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
/**
turn a matrix A =
x x x x x
x x x x x
x x x x x
x x x x x
x x x x x
into
x x x x x
x x x x x
0 x x x x
0 0 x x x
0 0 0 x x
with householder rotations
Slow.
*/
int N ; SizeSquare(A,N);
DenseVector<T > p; Resize(p,N); Fill(p,0);
for(int k=start;k<N-2;k++){
//cerr << "hess" << k << std::endl;
DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1);
for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);} ///kth column
normalize(ck); ///Normalization cancels in PHP anyway
T beta;
Householder_vector<T >(ck, 0, ck.size()-1, v, beta); ///Householder vector
Householder_mult<T>(A,v,beta,start,k+1,N-1,0); ///A -> PA
Householder_mult<T >(A,v,beta,start,k+1,N-1,1); ///PA -> PAP^H
///Accumulate eigenvector
Householder_mult<T >(Q,v,beta,start,k+1,N-1,1); ///Q -> QP^H
}
/*for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A(0,k,l);
}
}*/
}
template <class T>
void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
///Tridiagonalize a matrix
int N; SizeSquare(A,N);
Hess(A,Q,start);
/*for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A(0,l,k);
}
}*/
}
template <class T>
void ForceTridiagonal(DenseMatrix<T> &A){
///Tridiagonalize a matrix
int N ; SizeSquare(A,N);
for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A[l][k]=0;
A[k][l]=0;
}
}
}
template <class T>
int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
///Solve a symmetric eigensystem, not necessarily in tridiagonal form
int N; SizeSquare(Ain,N);
DenseMatrix<T > A; A = Ain;
DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q);
Tri(A,Q,0);
int it = my_Wilkinson<T>(A, evals, evecs, small);
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
return it;
}
template <class T>
int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
return my_Wilkinson(Ain, evals, evecs, small);
}
template <class T>
int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
return my_SymmEigensystem(Ain, evals, evecs, small);
}
template <class T>
int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
///Solve a general eigensystem, not necessarily in tridiagonal form
int N = Ain.dim;
DenseMatrix<T > A(N); A = Ain;
DenseMatrix<T > Q(N);Q.Unity();
Hess(A,Q,0);
int it = QReigensystem<T>(A, evals, evecs, small);
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
return it;
}
}
#endif

View File

@ -0,0 +1,215 @@
#ifndef HOUSEHOLDER_H
#define HOUSEHOLDER_H
#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#define ENTER() std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#define LEAVE() std::cout << GridLogMessage << "EXIT "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <fstream>
#include <complex>
#include <algorithm>
namespace Grid {
/** Comparison function for finding the max element in a vector **/
template <class T> bool cf(T i, T j) {
return abs(i) < abs(j);
}
/**
Calculate a real Givens angle
**/
template <class T> inline void Givens_calc(T y, T z, T &c, T &s){
RealD mz = (RealD)abs(z);
if(mz==0.0){
c = 1; s = 0;
}
if(mz >= (RealD)abs(y)){
T t = -y/z;
s = (T)1.0 / sqrt ((T)1.0 + t * t);
c = s * t;
} else {
T t = -z/y;
c = (T)1.0 / sqrt ((T)1.0 + t * t);
s = c * t;
}
}
template <class T> inline void Givens_mult(DenseMatrix<T> &A, int i, int k, T c, T s, int dir)
{
int q ; SizeSquare(A,q);
if(dir == 0){
for(int j=0;j<q;j++){
T nu = A[i][j];
T w = A[k][j];
A[i][j] = (c*nu + s*w);
A[k][j] = (-s*nu + c*w);
}
}
if(dir == 1){
for(int j=0;j<q;j++){
T nu = A[j][i];
T w = A[j][k];
A[j][i] = (c*nu - s*w);
A[j][k] = (s*nu + c*w);
}
}
}
/**
from input = x;
Compute the complex Householder vector, v, such that
P = (I - b v transpose(v) )
b = 2/v.v
P | x | | x | k = 0
| x | | 0 |
| x | = | 0 |
| x | | 0 | j = 3
| x | | x |
These are the "Unreduced" Householder vectors.
**/
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta)
{
int N ; Size(input,N);
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> );
if(abs(m) > 0.0){
T alpha = 0;
for(int i=k; i<j+1; i++){
v[i] = input[i]/m;
alpha = alpha + v[i]*conj(v[i]);
}
alpha = sqrt(alpha);
beta = (T)1.0/(alpha*(alpha + abs(v[k]) ));
if(abs(v[k]) > 0.0) v[k] = v[k] + (v[k]/abs(v[k]))*alpha;
else v[k] = -alpha;
} else{
for(int i=k; i<j+1; i++){
v[i] = 0.0;
}
}
}
/**
from input = x;
Compute the complex Householder vector, v, such that
P = (I - b v transpose(v) )
b = 2/v.v
Px = alpha*e_dir
These are the "Unreduced" Householder vectors.
**/
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta)
{
int N = input.size();
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf);
if(abs(m) > 0.0){
T alpha = 0;
for(int i=k; i<j+1; i++){
v[i] = input[i]/m;
alpha = alpha + v[i]*conj(v[i]);
}
alpha = sqrt(alpha);
beta = 1.0/(alpha*(alpha + abs(v[dir]) ));
if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha;
else v[dir] = -alpha;
}else{
for(int i=k; i<j+1; i++){
v[i] = 0.0;
}
}
}
/**
Compute the product PA if trans = 0
AP if trans = 1
P = (I - b v transpose(v) )
b = 2/v.v
start at element l of matrix A
v is of length j - k + 1 of v are nonzero
**/
template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans)
{
int N ; SizeSquare(A,N);
if(abs(beta) > 0.0){
for(int p=l; p<N; p++){
T s = 0;
if(trans==0){
for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p];
s *= beta;
for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);}
} else {
for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];}
s *= beta;
for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);}
}
}
}
}
/**
Compute the product PA if trans = 0
AP if trans = 1
P = (I - b v transpose(v) )
b = 2/v.v
start at element l of matrix A
v is of length j - k + 1 of v are nonzero
A is tridiagonal
**/
template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans)
{
if(abs(beta) > 0.0){
int N ; SizeSquare(A,N);
DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);
T s;
for(int p=l; p<M; p++){
s = 0;
if(trans==0){
for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p];
}else{
for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i];
}
s = beta*s;
if(trans==0){
for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k];
}else{
for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]);
}
}
for(int p=l; p<M; p++){
if(trans==0){
for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p];
}else{
for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i];
}
}
}
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,426 @@
#ifndef MATRIX_H
#define MATRIX_H
#include <cstdlib>
#include <string>
#include <cmath>
#include <vector>
#include <iostream>
#include <iomanip>
#include <complex>
#include <typeinfo>
#include <Grid.h>
/** Sign function **/
template <class T> T sign(T p){return ( p/abs(p) );}
/////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////// Hijack STL containers for our wicked means /////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class T> using Vector = Vector<T>;
template<class T> using Matrix = Vector<Vector<T> >;
template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); }
template<class T> void Resize(Matrix<T > & mat, int N, int M) {
mat.resize(N);
for(int i=0;i<N;i++){
mat[i].resize(M);
}
}
template<class T> void Size(Vector<T> & vec, int &N)
{
N= vec.size();
}
template<class T> void Size(Matrix<T> & mat, int &N,int &M)
{
N= mat.size();
M= mat[0].size();
}
template<class T> void SizeSquare(Matrix<T> & mat, int &N)
{
int M; Size(mat,N,M);
assert(N==M);
}
template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1)
{
int N2,M2;
Size(mat1,N1,M1);
Size(mat2,N2,M2);
assert(N1==N2);
assert(M1==M2);
}
//*****************************************
//* (Complex) Vector operations *
//*****************************************
/**Conj of a Vector **/
template <class T> Vector<T> conj(Vector<T> p){
Vector<T> q(p.size());
for(int i=0;i<p.size();i++){q[i] = conj(p[i]);}
return q;
}
/** Norm of a Vector**/
template <class T> T norm(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
return abs(sqrt(sum));
}
/** Norm squared of a Vector **/
template <class T> T norm2(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
return abs((sum));
}
/** Sum elements of a Vector **/
template <class T> T trace(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i];}
return sum;
}
/** Fill a Vector with constant c **/
template <class T> void Fill(Vector<T> &p, T c){
for(int i=0;i<p.size();i++){p[i] = c;}
}
/** Normalize a Vector **/
template <class T> void normalize(Vector<T> &p){
T m = norm(p);
if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;}
}
/** Vector by scalar **/
template <class T, class U> Vector<T> times(Vector<T> p, U s){
for(int i=0;i<p.size();i++){p[i] *= s;}
return p;
}
template <class T, class U> Vector<T> times(U s, Vector<T> p){
for(int i=0;i<p.size();i++){p[i] *= s;}
return p;
}
/** inner product of a and b = conj(a) . b **/
template <class T> T inner(Vector<T> a, Vector<T> b){
T m = 0.;
for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];}
return m;
}
/** sum of a and b = a + b **/
template <class T> Vector<T> add(Vector<T> a, Vector<T> b){
Vector<T> m(a.size());
for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];}
return m;
}
/** sum of a and b = a - b **/
template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){
Vector<T> m(a.size());
for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];}
return m;
}
/**
*********************************
* Matrices *
*********************************
**/
template<class T> void Fill(Matrix<T> & mat, T&val) {
int N,M;
Size(mat,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
mat[i][j] = val;
}}
}
/** Transpose of a matrix **/
Matrix<T> Transpose(Matrix<T> & mat){
int N,M;
Size(mat,N,M);
Matrix C; Resize(C,M,N);
for(int i=0;i<M;i++){
for(int j=0;j<N;j++){
C[i][j] = mat[j][i];
}}
return C;
}
/** Set Matrix to unit matrix **/
template<class T> void Unity(Matrix<T> &mat){
int N; SizeSquare(mat,N);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if ( i==j ) A[i][j] = 1;
else A[i][j] = 0;
}
}
}
/** Add C * I to matrix **/
template<class T>
void PlusUnit(Matrix<T> & A,T c){
int dim; SizeSquare(A,dim);
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
}
/** return the Hermitian conjugate of matrix **/
Matrix<T> HermitianConj(Matrix<T> &mat){
int dim; SizeSquare(mat,dim);
Matrix<T> C; Resize(C,dim,dim);
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
C[i][j] = conj(mat[j][i]);
}
}
return C;
}
/** return diagonal entries as a Vector **/
Vector<T> diag(Matrix<T> &A)
{
int dim; SizeSquare(A,dim);
Vector<T> d; Resize(d,dim);
for(int i=0;i<dim;i++){
d[i] = A[i][i];
}
return d;
}
/** Left multiply by a Vector **/
Vector<T> operator *(Vector<T> &B,Matrix<T> &A)
{
int K,M,N;
Size(B,K);
Size(A,M,N);
assert(K==M);
Vector<T> C; Resize(C,N);
for(int j=0;j<N;j++){
T sum = 0.0;
for(int i=0;i<M;i++){
sum += B[i] * A[i][j];
}
C[j] = sum;
}
return C;
}
/** return 1/diagonal entries as a Vector **/
Vector<T> inv_diag(Matrix<T> & A){
int dim; SizeSquare(A,dim);
Vector<T> d; Resize(d,dim);
for(int i=0;i<dim;i++){
d[i] = 1.0/A[i][i];
}
return d;
}
/** Matrix Addition **/
inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B)
{
int N,M ; SizeSame(A,B,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j] + B[i][j];
}
}
return C;
}
/** Matrix Subtraction **/
inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){
int N,M ; SizeSame(A,B,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j] - B[i][j];
}}
return C;
}
/** Matrix scalar multiplication **/
inline Matrix<T> operator* (Matrix<T> & A,T c){
int N,M; Size(A,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j]*c;
}}
return C;
}
/** Matrix Matrix multiplication **/
inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){
int K,L,N,M;
Size(A,K,L);
Size(B,N,M); assert(L==N);
Matrix C; Resize(C,K,M);
for(int i=0;i<K;i++){
for(int j=0;j<M;j++){
T sum = 0.0;
for(int k=0;k<N;k++) sum += A[i][k]*B[k][j];
C[i][j] =sum;
}
}
return C;
}
/** Matrix Vector multiplication **/
inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){
int M,N,K;
Size(A,N,M);
Size(B,K); assert(K==M);
Vector<T> C; Resize(C,N);
for(int i=0;i<N;i++){
T sum = 0.0;
for(int j=0;j<M;j++) sum += A[i][j]*B[j];
C[i] = sum;
}
return C;
}
/** Some version of Matrix norm **/
/*
inline T Norm(){ // this is not a usual L2 norm
T norm = 0;
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
norm += abs(A[i][j]);
}}
return norm;
}
*/
/** Some version of Matrix norm **/
template<class T> T LargestDiag(Matrix<T> &A)
{
int dim ; SizeSquare(A,dim);
T ld = abs(A[0][0]);
for(int i=1;i<dim;i++){
T cf = abs(A[i][i]);
if(abs(cf) > abs(ld) ){ld = cf;}
}
return ld;
}
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small)
{
int dim; SizeSquare(A,dim);
for(int l = dim - 1 - offset; l >= 1; l--) {
if((U)abs(A[l][l - 1]) < (U)small) {
A[l][l-1]=(U)0.0;
return l;
}
}
return 0;
}
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small)
{
int dim; SizeSquare(A,dim);
for(int l = dim - 1 - offset; l >= 1; l--) {
if((U)abs(A[l][l - 1]) < (U)small) {
A[l][l - 1] = (U)0.0;
A[l - 1][l] = (U)0.0;
return l;
}
}
return 0;
}
/**Assign a submatrix to a larger one**/
template<class T>
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
{
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
A[i][j] = S[i - row_st][j - col_st];
}
}
}
/**Get a square submatrix**/
template <class T>
Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end)
{
Matrix<T> H; Resize(row_end - row_st,col_end-col_st);
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
H[i-row_st][j-col_st]=A[i][j];
}}
return H;
}
/**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/
template<class T>
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
{
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
A[i][j] = S[i - row_st][j - col_st];
}}
}
/** compute b_i A_ij b_j **/ // surprised no Conj
template<class T> T proj(Matrix<T> A, Vector<T> B){
int dim; SizeSquare(A,dim);
int dimB; Size(B,dimB);
assert(dimB==dim);
T C = 0;
for(int i=0;i<dim;i++){
T sum = 0.0;
for(int j=0;j<dim;j++){
sum += A[i][j]*B[j];
}
C += B[i]*sum; // No conj?
}
return C;
}
/*
*************************************************************
*
* Matrix Vector products
*
*************************************************************
*/
// Instead make a linop and call my CG;
/// q -> q Q
template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q)
{
int M; SizeSquare(Q,M);
int N; Size(q,N);
assert(M==N);
times(q,Q,N);
}
/// q -> q Q
template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N)
{
GridBase *grid = q[0]._grid;
int M; SizeSquare(Q,M);
int K; Size(q,K);
assert(N<M);
assert(N<K);
Vector<Fermion> S(N,grid );
for(int j=0;j<N;j++){
S[j] = zero;
for(int k=0;k<N;k++){
S[j] = S[j] + q[k]* Q[k][j];
}
}
for(int j=0;j<q.size();j++){
q[j] = S[j];
}
}
#endif

View File

@ -0,0 +1,48 @@
#ifndef GRID_MATRIX_UTILS_H
#define GRID_MATRIX_UTILS_H
namespace Grid {
namespace MatrixUtils {
template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
N=A.size(); assert(N>0);
M=A[0].size();
for(int i=0;i<N;i++){
assert(A[i].size()==M);
}
}
template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
{
int M;
Size(A,N,M);
assert(N==M);
}
template<class T> inline void Fill(Matrix<T>& A,T & val)
{
int N,M;
Size(A,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
A[i][j]=val;
}}
}
template<class T> inline void Diagonal(Matrix<T>& A,T & val)
{
int N;
SizeSquare(A,N);
for(int i=0;i<N;i++){
A[i][i]=val;
}
}
template<class T> inline void Identity(Matrix<T>& A)
{
Fill(A,0.0);
Diagonal(A,1.0);
}
};
}
#endif

View File

@ -0,0 +1,92 @@
#ifndef GRID_PREC_CONJUGATE_RESIDUAL_H
#define GRID_PREC_CONJUGATE_RESIDUAL_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Base classes for iterative processes based on operators
// single input vec, single output vec.
/////////////////////////////////////////////////////////////
template<class Field>
class PrecConjugateResidual : public OperatorFunction<Field> {
public:
RealD Tolerance;
Integer MaxIterations;
int verbose;
LinearFunction<Field> &Preconditioner;
PrecConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec) : Tolerance(tol), MaxIterations(maxit), Preconditioner(Prec)
{
verbose=1;
};
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
RealD a, b, c, d;
RealD cp, ssq,rsq;
RealD rAr, rAAr, rArp;
RealD pAp, pAAp;
GridBase *grid = src._grid;
Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid);
psi=zero;
r = src;
Preconditioner(r,p);
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
Ar=Ap;
rAr=pAp;
rAAr=pAAp;
cp =norm2(r);
ssq=norm2(src);
rsq=Tolerance*Tolerance*ssq;
if (verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
for(int k=0;k<MaxIterations;k++){
Preconditioner(Ap,z);
RealD rq= real(innerProduct(Ap,z));
a = rAr/rq;
axpy(psi,a,p,psi);
cp = axpy_norm(r,-a,z,r);
rArp=rAr;
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
b =rAr/rArp;
axpy(p,b,p,r);
pAAp=axpy_norm(Ap,b,Ap,Ar);
if(verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
if(cp<rsq) {
Linop.HermOp(psi,Ap);
axpy(r,-1.0,src,Ap);
RealD true_resid = norm2(r)/ssq;
std::cout<<GridLogMessage<<"PrecConjugateResidual: Converged on iteration " <<k
<< " computed residual "<<sqrt(cp/ssq)
<< " true residual "<<sqrt(true_resid)
<< " target " <<Tolerance <<std::endl;
return;
}
}
std::cout<<GridLogMessage<<"PrecConjugateResidual did NOT converge"<<std::endl;
assert(0);
}
};
}
#endif

View File

@ -0,0 +1,175 @@
#ifndef GRID_PREC_GCR_H
#define GRID_PREC_GCR_H
///////////////////////////////////////////////////////////////////////////////////////////////////////
//VPGCR Abe and Zhang, 2005.
//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING
//Computing and Information Volume 2, Number 2, Pages 147-161
//NB. Likely not original reference since they are focussing on a preconditioner variant.
// but VPGCR was nicely written up in their paper
///////////////////////////////////////////////////////////////////////////////////////////////////////
namespace Grid {
template<class Field>
class PrecGeneralisedConjugateResidual : public OperatorFunction<Field> {
public:
RealD Tolerance;
Integer MaxIterations;
int verbose;
int mmax;
int nstep;
int steps;
LinearFunction<Field> &Preconditioner;
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
Tolerance(tol),
MaxIterations(maxit),
Preconditioner(Prec),
mmax(_mmax),
nstep(_nstep)
{
verbose=1;
};
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
psi=zero;
RealD cp, ssq,rsq;
ssq=norm2(src);
rsq=Tolerance*Tolerance*ssq;
Field r(src._grid);
steps=0;
for(int k=0;k<MaxIterations;k++){
cp=GCRnStep(Linop,src,psi,rsq);
if ( verbose ) std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
if(cp<rsq) {
Linop.HermOp(psi,r);
axpy(r,-1.0,src,r);
RealD tr = norm2(r);
std::cout<<GridLogMessage<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps
<< " computed residual "<<sqrt(cp/ssq)
<< " true residual " <<sqrt(tr/ssq)
<< " target " <<Tolerance <<std::endl;
return;
}
}
std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
assert(0);
}
RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
RealD cp;
RealD a, b, c, d;
RealD zAz, zAAz;
RealD rAq, rq;
GridBase *grid = src._grid;
Field r(grid);
Field z(grid);
Field tmp(grid);
Field ttmp(grid);
Field Az(grid);
////////////////////////////////
// history for flexible orthog
////////////////////////////////
std::vector<Field> q(mmax,grid);
std::vector<Field> p(mmax,grid);
std::vector<RealD> qq(mmax);
//////////////////////////////////
// initial guess x0 is taken as nonzero.
// r0=src-A x0 = src
//////////////////////////////////
Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
r=src-Az;
/////////////////////
// p = Prec(r)
/////////////////////
Preconditioner(r,z);
std::cout<<GridLogMessage<< " Preconditioner in " << norm2(r)<<std::endl;
std::cout<<GridLogMessage<< " Preconditioner out " << norm2(z)<<std::endl;
Linop.HermOp(z,tmp);
std::cout<<GridLogMessage<< " Preconditioner Aout " << norm2(tmp)<<std::endl;
ttmp=tmp;
tmp=tmp-r;
std::cout<<GridLogMessage<< " Preconditioner resid " << std::sqrt(norm2(tmp)/norm2(r))<<std::endl;
/*
std::cout<<GridLogMessage<<r<<std::endl;
std::cout<<GridLogMessage<<z<<std::endl;
std::cout<<GridLogMessage<<ttmp<<std::endl;
std::cout<<GridLogMessage<<tmp<<std::endl;
*/
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
//p[0],q[0],qq[0]
p[0]= z;
q[0]= Az;
qq[0]= zAAz;
cp =norm2(r);
for(int k=0;k<nstep;k++){
steps++;
int kp = k+1;
int peri_k = k %mmax;
int peri_kp= kp%mmax;
rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
a = rq/qq[peri_k];
axpy(psi,a,p[peri_k],psi);
cp = axpy_norm(r,-a,q[peri_k],r);
std::cout<<GridLogMessage<< " VPGCR_step resid" <<sqrt(cp/rsq)<<std::endl;
if((k==nstep-1)||(cp<rsq)){
return cp;
}
Preconditioner(r,z);// solve Az = r
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
Linop.HermOp(z,tmp);
tmp=tmp-r;
std::cout<<GridLogMessage<< " Preconditioner resid" <<sqrt(norm2(tmp)/norm2(r))<<std::endl;
q[peri_kp]=Az;
p[peri_kp]=z;
int northog = ((kp)>(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history.
for(int back=0;back<northog;back++){
int peri_back=(k-back)%mmax; assert((k-back)>=0);
b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
p[peri_kp]=p[peri_kp]+b*p[peri_back];
q[peri_kp]=q[peri_kp]+b*q[peri_back];
}
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
}
assert(0); // never reached
return cp;
}
};
}
#endif

View File

@ -89,7 +89,7 @@ namespace Grid {
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
///////////////////////////////////////////////////
@ -108,7 +108,7 @@ namespace Grid {
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};

View File

@ -0,0 +1,122 @@
#include <math.h>
#include <stdlib.h>
#include <vector>
struct Bisection {
static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
{
int i,j;
std::vector<RealD> evec1(row_num+3);
std::vector<RealD> evec2(row_num+3);
RealD eps2;
ALPHA[1]=0.;
BETHA[1]=0.;
for(i=0;i<row_num-1;i++) {
ALPHA[i+1] = A[i*(row_num+1)].real();
BETHA[i+2] = A[i*(row_num+1)+1].real();
}
ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
// Do we really need to sort here?
int begin=1;
int end = row_num;
int swapped=1;
while(swapped) {
swapped=0;
for(i=begin;i<end;i++){
if(mag(evec2[i])>mag(evec2[i+1])) {
swap(evec2+i,evec2+i+1);
swapped=1;
}
}
end--;
for(i=end-1;i>=begin;i--){
if(mag(evec2[i])>mag(evec2[i+1])) {
swap(evec2+i,evec2+i+1);
swapped=1;
}
}
begin++;
}
for(i=0;i<row_num;i++){
for(j=0;j<row_num;j++) {
if(i==j) H[i*row_num+j]=evec2[i+1];
else H[i*row_num+j]=0.;
}
}
}
static void bisec(std::vector<RealD> &c,
std::vector<RealD> &b,
int n,
int m1,
int m2,
RealD eps1,
RealD relfeh,
std::vector<RealD> &x,
RealD &eps2)
{
std::vector<RealD> wu(n+2);
RealD h,q,x1,xu,x0,xmin,xmax;
int i,a,k;
b[1]=0.0;
xmin=c[n]-fabs(b[n]);
xmax=c[n]+fabs(b[n]);
for(i=1;i<n;i++){
h=fabs(b[i])+fabs(b[i+1]);
if(c[i]+h>xmax) xmax= c[i]+h;
if(c[i]-h<xmin) xmin= c[i]-h;
}
xmax *=2.;
eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
if(eps1<=0.0) eps1=eps2;
eps2=0.5*eps1+7.0*(eps2);
x0=xmax;
for(i=m1;i<=m2;i++){
x[i]=xmax;
wu[i]=xmin;
}
for(k=m2;k>=m1;k--){
xu=xmin;
i=k;
do{
if(xu<wu[i]){
xu=wu[i];
i=m1-1;
}
i--;
}while(i>=m1);
if(x0>x[k]) x0=x[k];
while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
x1=(xu+x0)/2;
a=0;
q=1.0;
for(i=1;i<=n;i++){
q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
if(q<0) a++;
}
// printf("x1=%e a=%d\n",x1,a);
if(a<k){
if(a<m1){
xu=x1;
wu[m1]=x1;
}else {
xu=x1;
wu[a+1]=x1;
if(x[a]>x1) x[a]=x1;
}
}else x0=x1;
}
x[k]=(x0+xu)/2;
}
}
}

View File

@ -0,0 +1 @@

View File

@ -87,6 +87,14 @@ class CartesianCommunicator {
void *recv,
int recv_from_rank,
int bytes);
void RecvFrom(void *recv,
int recv_from_rank,
int bytes);
void SendTo(void *xmit,
int xmit_to_rank,
int bytes);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,

View File

@ -81,13 +81,30 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
}
void CartesianCommunicator::RecvFrom(void *recv,
int from,
int bytes)
{
MPI_Status stat;
int ierr=MPI_Recv(recv, bytes, MPI_CHAR,from,from,communicator,&stat);
assert(ierr==0);
}
void CartesianCommunicator::SendTo(void *xmit,
int dest,
int bytes)
{
int rank = _processor; // used for tag; must know who it comes from
int ierr = MPI_Send(xmit, bytes, MPI_CHAR,dest,_processor,communicator);
assert(ierr==0);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
MPI_Request xrq;
MPI_Request rrq;
@ -100,7 +117,6 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
list.push_back(xrq);
list.push_back(rrq);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{

View File

@ -22,6 +22,20 @@ void CartesianCommunicator::GlobalSum(double &){}
void CartesianCommunicator::GlobalSum(uint32_t &){}
void CartesianCommunicator::GlobalSumVector(double *,int N){}
void CartesianCommunicator::RecvFrom(void *recv,
int recv_from_rank,
int bytes)
{
assert(0);
}
void CartesianCommunicator::SendTo(void *xmit,
int xmit_to_rank,
int bytes)
{
assert(0);
}
// Basic Halo comms primitive -- should never call in single node
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,

View File

@ -8,7 +8,7 @@ class SimpleCompressor {
public:
void Point(int) {};
vobj operator() (const vobj &arg) {
vobj operator() (const vobj &arg,int dimension,int plane,int osite,GridBase *grid) {
return arg;
}
};
@ -24,21 +24,32 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
if ( !rhs._grid->CheckerBoarded(dimension) ) {
cbmask = 0x3;
}
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
if ( cbmask == 0x3 ) {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int bo = n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb &cbmask ) {
buffer[bo+b]=compress(rhs._odata[so+o+b]);
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int bo = n*rhs._grid->_slice_block[dimension];
buffer[bo+b]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
}
}
} else {
int bo=0;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb &cbmask ) {
buffer[bo++]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
}
}
}
}
}
@ -59,18 +70,33 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
if ( cbmask ==0x3){
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o=n*rhs._grid->_slice_stride[dimension];
int offset = b+n*rhs._grid->_slice_block[dimension];
int o=n*rhs._grid->_slice_stride[dimension];
int offset = b+n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb & cbmask ) {
cobj temp;
temp =compress(rhs._odata[so+o+b]);
cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
extract<cobj>(temp,pointers,offset);
}
}
} else {
assert(0); //Fixme think this is buggy
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o=n*rhs._grid->_slice_stride[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
int offset = b+n*rhs._grid->_slice_block[dimension];
if ( ocb & cbmask ) {
cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
extract<cobj>(temp,pointers,offset);
}
}
}
}
@ -109,16 +135,28 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<v
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
if ( cbmask ==0x3 ) {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
rhs._odata[so+o+b]=buffer[bo+b];
}
}
} else {
int bo=0;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
rhs._odata[so+o+b]=buffer[bo++];
}
}
}
}
}
@ -137,16 +175,28 @@ PARALLEL_NESTED_LOOP2
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
if(cbmask ==0x3 ) {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int offset = b+n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int offset = b+n*rhs._grid->_slice_block[dimension];
merge(rhs._odata[so+o+b],pointers,offset);
}
}
} else {
assert(0); // think this is buggy FIXME
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int offset = b+n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) {
merge(rhs._odata[so+o+b],pointers,offset);
}
}
}
}
}
@ -166,17 +216,29 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
int e2=rhs._grid->_slice_block[dimension];
if(cbmask == 0x3 ){
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension]+b;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) {
//lhs._odata[lo+o]=rhs._odata[ro+o];
int o =n*rhs._grid->_slice_stride[dimension]+b;
//lhs._odata[lo+o]=rhs._odata[ro+o];
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
}
}
} else {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension]+b;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) {
//lhs._odata[lo+o]=rhs._odata[ro+o];
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
}
}
}
}

View File

@ -9,7 +9,7 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
Lattice<vobj> ret(rhs._grid);
Lattice<vobj> ret(rhs._grid);
int fd = rhs._grid->_fdimensions[dimension];
int rd = rhs._grid->_rdimensions[dimension];
@ -26,10 +26,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
if ( !comm_dim ) {
// std::cout << "Cshift_local" <<std::endl;
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
} else if ( splice_dim ) {
// std::cout << "Cshift_comms_simd" <<std::endl;
Cshift_comms_simd(ret,rhs,dimension,shift);
} else {
// std::cout << "Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift);
}
return ret;
@ -42,9 +45,13 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &r
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
// std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if ( sshift[0] == sshift[1] ) {
// std::cout << "Single pass Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift,0x3);
} else {
// std::cout << "Two pass Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Cshift_comms(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
}
@ -113,12 +120,16 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
int xmit_to_rank;
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
grid->SendToRecvFrom((void *)&send_buf[0],
xmit_to_rank,
(void *)&recv_buf[0],
recv_from_rank,
bytes);
// for(int i=0;i<words;i++){
// std::cout << "SendRecv ["<<i<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << " 0x" << cbmask<<std::endl;
// }
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
}
}

View File

@ -132,18 +132,18 @@ inline void CBFromExpression(int &cb,const T1& lat) // Lattice leaf
assert(cb==lat.checkerboard);
}
cb=lat.checkerboard;
// std::cout<<"Lattice leaf cb "<<cb<<std::endl;
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
}
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf
{
// std::cout<<"Non lattice leaf cb"<<cb<<std::endl;
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
}
template <typename Op, typename T1>
inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
{
CBFromExpression(cb,std::get<0>(expr.second));// recurse
// std::cout<<"Unary node cb "<<cb<<std::endl;
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
}
template <typename Op, typename T1, typename T2>
@ -151,7 +151,7 @@ inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &ex
{
CBFromExpression(cb,std::get<0>(expr.second));// recurse
CBFromExpression(cb,std::get<1>(expr.second));
// std::cout<<"Binary node cb "<<cb<<std::endl;
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
}
template <typename Op, typename T1, typename T2, typename T3>
inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
@ -159,7 +159,7 @@ inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3
CBFromExpression(cb,std::get<0>(expr.second));// recurse
CBFromExpression(cb,std::get<1>(expr.second));
CBFromExpression(cb,std::get<2>(expr.second));
// std::cout<<"Trinary node cb "<<cb<<std::endl;
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
}
////////////////////////////////////////////
@ -178,6 +178,7 @@ GridUnopClass(UnaryConj,conjugate(a));
GridUnopClass(UnaryTrace,trace(a));
GridUnopClass(UnaryTranspose,transpose(a));
GridUnopClass(UnaryTa,Ta(a));
GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a));
GridUnopClass(UnaryReal,real(a));
GridUnopClass(UnaryImag,imag(a));
GridUnopClass(UnaryToReal,toReal(a));
@ -290,13 +291,14 @@ GRID_DEF_UNOP(conjugate,UnaryConj);
GRID_DEF_UNOP(trace,UnaryTrace);
GRID_DEF_UNOP(transpose,UnaryTranspose);
GRID_DEF_UNOP(Ta,UnaryTa);
GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup);
GRID_DEF_UNOP(real,UnaryReal);
GRID_DEF_UNOP(imag,UnaryImag);
GRID_DEF_UNOP(toReal,UnaryToReal);
GRID_DEF_UNOP(toComplex,UnaryToComplex);
GRID_DEF_UNOP(abs ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing
GRID_DEF_UNOP(sqrt ,UnarySqrt);
GRID_DEF_UNOP(rsqrt,UnarySqrt);
GRID_DEF_UNOP(rsqrt,UnaryRsqrt);
GRID_DEF_UNOP(sin ,UnarySin);
GRID_DEF_UNOP(cos ,UnaryCos);
GRID_DEF_UNOP(log ,UnaryLog);
@ -370,7 +372,7 @@ using namespace Grid;
tmp.func(eval(0,v1),eval(0,v2));
auto var = v1+v2;
std::cout<<typeid(var).name()<<std::endl;
std::cout<<GridLogMessage<<typeid(var).name()<<std::endl;
v3=v1+v2;
v3=v1+v2+v1*v2;

View File

@ -29,6 +29,9 @@ extern int GridCshiftPermuteMap[4][16];
class LatticeBase {};
class LatticeExpressionBase {};
template<class T> using Vector = std::vector<T,alignedAllocator<T> >; // Aligned allocator??
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; // Aligned allocator??
template <typename Op, typename T1>
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
public:
@ -59,7 +62,12 @@ public:
GridBase *_grid;
int checkerboard;
std::vector<vobj,alignedAllocator<vobj> > _odata;
Vector<vobj> _odata;
// to pthread need a computable loop where loop induction is not required
int begin(void) { return 0;};
int end(void) { return _odata.size(); }
vobj & operator[](int i) { return _odata[i]; };
public:
typedef typename vobj::scalar_type scalar_type;
@ -204,9 +212,10 @@ PARALLEL_FOR_LOOP
// Constructor requires "grid" passed.
// what about a default grid?
//////////////////////////////////////////////////////////////////
Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
// _odata.reserve(_grid->oSites());
// _odata.resize(_grid->oSites());
// std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
assert((((uint64_t)&_odata[0])&0xF) ==0);
checkerboard=0;
}
@ -221,7 +230,7 @@ PARALLEL_FOR_LOOP
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
std::cout<<"Lattice operator ="<<std::endl;
std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];

View File

@ -10,20 +10,11 @@ namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////////////
// Peek internal indices of a Lattice object
////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Index,class vobj>
auto peekIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0]))>
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss]);
}
return ret;
};
template<int Index,class vobj>
auto peekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
auto PeekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
ret.checkerboard=lhs.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
@ -31,9 +22,10 @@ PARALLEL_FOR_LOOP
return ret;
};
template<int Index,class vobj>
auto peekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
ret.checkerboard=lhs.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
@ -44,16 +36,8 @@ PARALLEL_FOR_LOOP
////////////////////////////////////////////////////////////////////////////////////////////////////
// Poke internal indices of a Lattice object
////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Index,class vobj>
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> & rhs)
{
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss]);
}
}
template<int Index,class vobj>
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
{
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
@ -61,7 +45,7 @@ PARALLEL_FOR_LOOP
}
}
template<int Index,class vobj>
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
{
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){

View File

@ -125,7 +125,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
assert(grid!=NULL);
// FIXME
std::cout<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();

View File

@ -5,6 +5,37 @@
namespace Grid {
//////////////////////////////////////////////////////////////
// Allow the RNG state to be less dense than the fine grid
//////////////////////////////////////////////////////////////
inline int RNGfillable(GridBase *coarse,GridBase *fine)
{
int rngdims = coarse->_ndimension;
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
int lowerdims = fine->_ndimension - coarse->_ndimension;
assert(lowerdims >= 0);
for(int d=0;d<lowerdims;d++){
assert(fine->_simd_layout[d]==1);
assert(fine->_processors[d]==1);
}
// local and global volumes subdivide cleanly after SIMDization
int multiplicity=1;
for(int d=0;d<rngdims;d++){
int fd= d+lowerdims;
assert(coarse->_processors[d] == fine->_processors[fd]);
assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]);
assert((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d]==fine->_rdimensions[fd]);
multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];
}
return multiplicity;
}
// Wrap seed_seq to give common interface with random_device
class fixedSeed {
public:
@ -226,26 +257,32 @@ namespace Grid {
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
conformable(_grid,l._grid);
int multiplicity = RNGfillable(_grid,l._grid);
int Nsimd =_grid->Nsimd();
int osites=_grid->oSites();
int words=sizeof(scalar_object)/sizeof(scalar_type);
std::vector<scalar_object> buf(Nsimd);
for(int ss=0;ss<osites;ss++){
for(int si=0;si<Nsimd;si++){
int gdx = generator_idx(ss,si); // index of generator state
scalar_type *pointer = (scalar_type *)&buf[si];
for(int idx=0;idx<words;idx++){
fillScalar(pointer[idx],dist,_generators[gdx]);
PARALLEL_FOR_LOOP
for(int ss=0;ss<osites;ss++){
std::vector<scalar_object> buf(Nsimd);
for(int m=0;m<multiplicity;m++) {// Draw from same generator multiplicity times
int sm=multiplicity*ss+m; // Maps the generator site to the fine site
for(int si=0;si<Nsimd;si++){
int gdx = generator_idx(ss,si); // index of generator state
scalar_type *pointer = (scalar_type *)&buf[si];
for(int idx=0;idx<words;idx++){
fillScalar(pointer[idx],dist,_generators[gdx]);
}
}
// merge into SIMD lanes
merge(l._odata[sm],buf);
}
// merge into SIMD lanes
merge(l._odata[ss],buf);
}
};

View File

@ -26,7 +26,7 @@ PARALLEL_FOR_LOOP
// Trace Index level dependent operation
////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Index,class vobj>
inline auto traceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
{
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
PARALLEL_FOR_LOOP

View File

@ -17,13 +17,14 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
}
}
////////////////////////////////////////////////////////////////////////////////////////////
// remove and insert a half checkerboard
////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
half.checkerboard = cb;
int ssh=0;
PARALLEL_FOR_LOOP
//PARALLEL_FOR_LOOP
for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
int cbos;
@ -40,7 +41,7 @@ PARALLEL_FOR_LOOP
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
int cb = half.checkerboard;
int ssh=0;
PARALLEL_FOR_LOOP
//PARALLEL_FOR_LOOP
for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
int cbos;
@ -158,6 +159,7 @@ template<class vobj,class CComplex>
fine_inner = localInnerProduct(fineX,fineY);
blockSum(coarse_inner,fine_inner);
PARALLEL_FOR_LOOP
for(int ss=0;ss<coarse->oSites();ss++){
CoarseInner._odata[ss] = coarse_inner._odata[ss];
}
@ -168,7 +170,7 @@ inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
GridBase *coarse = ip._grid;
Lattice<vobj> zz(fineX._grid); zz=zero;
blockInnerProduct(ip,fineX,fineX);
ip = rsqrt(ip);
ip = pow(ip,-0.5);
blockZAXPY(fineX,ip,fineX,zz);
}
// useful in multigrid project;
@ -297,5 +299,42 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
}
template<class vobj>
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
{
typedef typename vobj::scalar_object sobj;
GridBase *cg = coarse._grid;
GridBase *fg = fine._grid;
int nd = cg->_ndimension;
subdivides(cg,fg);
assert(cg->_ndimension==fg->_ndimension);
std::vector<int> ratio(cg->_ndimension);
for(int d=0;d<cg->_ndimension;d++){
ratio[d] = fg->_fdimensions[d]/cg->_fdimensions[d];
}
std::vector<int> fcoor(nd);
std::vector<int> ccoor(nd);
for(int g=0;g<fg->gSites();g++){
fg->GlobalIndexToGlobalCoor(g,fcoor);
for(int d=0;d<nd;d++){
ccoor[d] = fcoor[d]%cg->_gdimensions[d];
}
sobj tmp;
peekSite(tmp,coarse,ccoor);
pokeSite(tmp,fine,fcoor);
}
}
}
#endif

View File

@ -24,7 +24,7 @@ PARALLEL_FOR_LOOP
// Index level dependent transpose
////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Index,class vobj>
inline auto transposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
inline auto TransposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
{
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
PARALLEL_FOR_LOOP

View File

@ -24,6 +24,17 @@ PARALLEL_FOR_LOOP
return ret;
}
template<class obj> Lattice<obj> div(const Lattice<obj> &rhs,Integer y){
Lattice<obj> ret(rhs._grid);
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
ret._odata[ss]=div(rhs._odata[ss],y);
}
return ret;
}
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){
Lattice<obj> ret(rhs._grid);
ret.checkerboard = rhs.checkerboard;

View File

@ -22,7 +22,6 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
typedef typename iobj::vector_type mask_type;
const int Nsimd = grid->Nsimd();
const int words = sizeof(vobj)/sizeof(vector_type);
std::vector<Integer> mask(Nsimd);
std::vector<scalar_object> truevals (Nsimd);

512
lib/parallelIO/BinaryIO.h Normal file
View File

@ -0,0 +1,512 @@
#ifndef GRID_BINARY_IO_H
#define GRID_BINARY_IO_H
#ifdef HAVE_ENDIAN_H
#include <endian.h>
#endif
#include <arpa/inet.h>
#include <algorithm>
// 64bit endian swap is a portability pain
#ifndef __has_builtin // Optional of course.
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
#endif
#if HAVE_DECL_BE64TOH
#undef Grid_ntohll
#define Grid_ntohll be64toh
#endif
#if HAVE_DECL_NTOHLL
#undef Grid_ntohll
#define Grid_ntohll ntohll
#endif
#ifndef Grid_ntohll
#if BYTE_ORDER == BIG_ENDIAN
#define Grid_ntohll(A) (A)
#else
#if __has_builtin(__builtin_bswap64)
#define Grid_ntohll(A) __builtin_bswap64(A)
#else
#error
#endif
#endif
#endif
namespace Grid {
// A little helper
inline void removeWhitespace(std::string &key)
{
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
}
class BinaryIO {
public:
// Network is big endian
static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);}
static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);}
static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);}
static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);}
static inline void be32toh_v(void *file_object,uint32_t bytes)
{
uint32_t * f = (uint32_t *)file_object;
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
f[i] = ntohl(f[i]);
}
}
// LE must Swap and switch to host
static inline void le32toh_v(void *file_object,uint32_t bytes)
{
uint32_t *fp = (uint32_t *)file_object;
uint32_t f;
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
f = fp[i];
// got network order and the network to host
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
fp[i] = ntohl(f);
}
}
// BE is same as network
static inline void be64toh_v(void *file_object,uint32_t bytes)
{
uint64_t * f = (uint64_t *)file_object;
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
f[i] = Grid_ntohll(f[i]);
}
}
// LE must swap and switch;
static inline void le64toh_v(void *file_object,uint32_t bytes)
{
uint64_t *fp = (uint64_t *)file_object;
uint64_t f,g;
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
f = fp[i];
// got network order and the network to host
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
g = g << 32;
f = f >> 32;
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
fp[i] = Grid_ntohll(g);
}
}
template<class vobj,class fobj,class munger> static inline void Uint32Checksum(Lattice<vobj> lat,munger munge,uint32_t &csum)
{
typedef typename vobj::scalar_object sobj;
GridBase *grid = lat._grid ;
std::cout <<GridLogMessage<< "Uint32Checksum "<<norm2(lat)<<std::endl;
sobj siteObj;
fobj fileObj;
csum = 0;
std::vector<int> lcoor;
for(int l=0;l<grid->lSites();l++){
grid->CoorFromIndex(lcoor,l,grid->_ldimensions);
peekLocalSite(siteObj,lat,lcoor);
munge(siteObj,fileObj,csum);
}
grid->GlobalSum(csum);
}
static inline void Uint32Checksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
{
for(int i=0;i*sizeof(uint32_t)<buf_size_bytes;i++){
csum=csum+buf[i];
}
}
template<class vobj,class fobj,class munger>
static inline uint32_t readObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
{
typedef typename vobj::scalar_object sobj;
GridBase *grid = Umu._grid;
std::cout<< GridLogMessage<< "Serial read I/O "<< file<< std::endl;
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
// Find the location of each site and send to primary node
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
// available (how short sighted is that?)
std::ifstream fin(file,std::ios::binary|std::ios::in);
fin.seekg(offset);
Umu = zero;
uint32_t csum=0;
fobj file_object;
sobj munged;
for(int t=0;t<grid->_fdimensions[3];t++){
for(int z=0;z<grid->_fdimensions[2];z++){
for(int y=0;y<grid->_fdimensions[1];y++){
for(int x=0;x<grid->_fdimensions[0];x++){
std::vector<int> site({x,y,z,t});
if ( grid->IsBoss() ) {
fin.read((char *)&file_object,sizeof(file_object));
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
munge(file_object,munged,csum);
}
// The boss who read the file has their value poked
pokeSite(munged,Umu,site);
}}}}
return csum;
}
template<class vobj,class fobj,class munger>
static inline uint32_t writeObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string & format)
{
typedef typename vobj::scalar_object sobj;
GridBase *grid = Umu._grid;
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
//////////////////////////////////////////////////
// Serialise through node zero
//////////////////////////////////////////////////
std::cout<< GridLogMessage<< "Serial write I/O "<< file<<std::endl;
std::ofstream fout;
if ( grid->IsBoss() ) {
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
fout.seekp(offset);
}
uint32_t csum=0;
fobj file_object;
sobj unmunged;
for(int t=0;t<grid->_fdimensions[3];t++){
for(int z=0;z<grid->_fdimensions[2];z++){
for(int y=0;y<grid->_fdimensions[1];y++){
for(int x=0;x<grid->_fdimensions[0];x++){
std::vector<int> site({x,y,z,t});
// peek & write
peekSite(unmunged,Umu,site);
munge(unmunged,file_object,csum);
if ( grid->IsBoss() ) {
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
fout.write((char *)&file_object,sizeof(file_object));
}
}}}}
return csum;
}
template<class vobj,class fobj,class munger>
static inline uint32_t readObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
{
typedef typename vobj::scalar_object sobj;
GridBase *grid = Umu._grid;
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
// Take into account block size of parallel file systems want about
// 4-16MB chunks.
// Ideally one reader/writer per xy plane and read these contiguously
// with comms from nominated I/O nodes.
std::ifstream fin;
int nd = grid->_ndimension;
std::vector<int> parallel(nd,1);
std::vector<int> ioproc (nd);
std::vector<int> start(nd);
std::vector<int> range(nd);
for(int d=0;d<nd;d++){
assert(grid->CheckerBoarded(d) == 0);
}
uint64_t slice_vol = 1;
int IOnode = 1;
for(int d=0;d<grid->_ndimension;d++) {
if ( d==0 ) parallel[d] = 0;
if (parallel[d]) {
range[d] = grid->_ldimensions[d];
start[d] = grid->_processor_coor[d]*range[d];
ioproc[d]= grid->_processor_coor[d];
} else {
range[d] = grid->_gdimensions[d];
start[d] = 0;
ioproc[d]= 0;
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
}
slice_vol = slice_vol * range[d];
}
{
uint32_t tmp = IOnode;
grid->GlobalSum(tmp);
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
for(int d=0;d<grid->_ndimension;d++){
std::cout<< range[d];
if( d< grid->_ndimension-1 )
std::cout<< " x ";
}
std::cout << std::endl;
}
int myrank = grid->ThisRank();
int iorank = grid->RankFromProcessorCoor(ioproc);
if ( IOnode ) {
fin.open(file,std::ios::binary|std::ios::in);
}
//////////////////////////////////////////////////////////
// Find the location of each site and send to primary node
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
// available (how short sighted is that?)
//////////////////////////////////////////////////////////
Umu = zero;
uint32_t csum=0;
fobj fileObj;
sobj siteObj;
// need to implement these loops in Nd independent way with a lexico conversion
for(int tlex=0;tlex<slice_vol;tlex++){
std::vector<int> tsite(nd); // temporary mixed up site
std::vector<int> gsite(nd);
std::vector<int> lsite(nd);
std::vector<int> iosite(nd);
grid->CoorFromIndex(tsite,tlex,range);
for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
gsite[d] = tsite[d]+start[d]; // global site
}
/////////////////////////
// Get the rank of owner of data
/////////////////////////
int rank, o_idx,i_idx, g_idx;
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
grid->GlobalCoorToGlobalIndex(gsite,g_idx);
////////////////////////////////
// iorank reads from the seek
////////////////////////////////
if (myrank == iorank) {
fin.seekg(offset+g_idx*sizeof(fileObj));
fin.read((char *)&fileObj,sizeof(fileObj));
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
munge(fileObj,siteObj,csum);
if ( rank != myrank ) {
grid->SendTo((void *)&siteObj,rank,sizeof(siteObj));
} else {
pokeLocalSite(siteObj,Umu,lsite);
}
} else {
if ( myrank == rank ) {
grid->RecvFrom((void *)&siteObj,iorank,sizeof(siteObj));
pokeLocalSite(siteObj,Umu,lsite);
}
}
grid->Barrier(); // necessary?
}
grid->GlobalSum(csum);
return csum;
}
//////////////////////////////////////////////////////////
// Parallel writer
//////////////////////////////////////////////////////////
template<class vobj,class fobj,class munger>
static inline uint32_t writeObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string & format)
{
typedef typename vobj::scalar_object sobj;
GridBase *grid = Umu._grid;
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
int nd = grid->_ndimension;
for(int d=0;d<nd;d++){
assert(grid->CheckerBoarded(d) == 0);
}
std::vector<int> parallel(nd,1);
std::vector<int> ioproc (nd);
std::vector<int> start(nd);
std::vector<int> range(nd);
uint64_t slice_vol = 1;
int IOnode = 1;
for(int d=0;d<grid->_ndimension;d++) {
if ( d==0 ) parallel[d] = 0;
if (parallel[d]) {
range[d] = grid->_ldimensions[d];
start[d] = grid->_processor_coor[d]*range[d];
ioproc[d]= grid->_processor_coor[d];
} else {
range[d] = grid->_gdimensions[d];
start[d] = 0;
ioproc[d]= 0;
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
}
slice_vol = slice_vol * range[d];
}
{
uint32_t tmp = IOnode;
grid->GlobalSum(tmp);
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
for(int d=0;d<grid->_ndimension;d++){
std::cout<< range[d];
if( d< grid->_ndimension-1 )
std::cout<< " x ";
}
std::cout << std::endl;
}
int myrank = grid->ThisRank();
int iorank = grid->RankFromProcessorCoor(ioproc);
// Take into account block size of parallel file systems want about
// 4-16MB chunks.
// Ideally one reader/writer per xy plane and read these contiguously
// with comms from nominated I/O nodes.
std::ofstream fout;
if ( IOnode ) fout.open(file,std::ios::binary|std::ios::in|std::ios::out);
//////////////////////////////////////////////////////////
// Find the location of each site and send to primary node
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
// available (how short sighted is that?)
//////////////////////////////////////////////////////////
uint32_t csum=0;
fobj fileObj;
sobj siteObj;
// need to implement these loops in Nd independent way with a lexico conversion
for(int tlex=0;tlex<slice_vol;tlex++){
std::vector<int> tsite(nd); // temporary mixed up site
std::vector<int> gsite(nd);
std::vector<int> lsite(nd);
std::vector<int> iosite(nd);
grid->CoorFromIndex(tsite,tlex,range);
for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
gsite[d] = tsite[d]+start[d]; // global site
}
/////////////////////////
// Get the rank of owner of data
/////////////////////////
int rank, o_idx,i_idx, g_idx;
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
grid->GlobalCoorToGlobalIndex(gsite,g_idx);
////////////////////////////////
// iorank writes from the seek
////////////////////////////////
if (myrank == iorank) {
if ( rank != myrank ) {
grid->RecvFrom((void *)&siteObj,rank,sizeof(siteObj));
} else {
peekLocalSite(siteObj,Umu,lsite);
}
munge(siteObj,fileObj,csum);
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
fout.seekp(offset+g_idx*sizeof(fileObj));
fout.write((char *)&fileObj,sizeof(fileObj));
} else {
if ( myrank == rank ) {
peekLocalSite(siteObj,Umu,lsite);
grid->SendTo((void *)&siteObj,iorank,sizeof(siteObj));
}
}
grid->Barrier(); // necessary// or every 16 packets to rate throttle??
}
grid->GlobalSum(csum);
return csum;
}
};
}
#endif

View File

@ -7,57 +7,23 @@
#include <fstream>
#include <map>
#ifdef HAVE_ENDIAN_H
#include <endian.h>
#endif
#include <arpa/inet.h>
// 64bit endian swap is a portability pain
#ifndef __has_builtin // Optional of course.
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
#endif
#if HAVE_DECL_BE64TOH
#undef Grid_ntohll
#define Grid_ntohll be64toh
#endif
#if HAVE_DECL_NTOHLL
#undef Grid_ntohll
#define Grid_ntohll ntohll
#endif
#ifndef Grid_ntohll
#if BYTE_ORDER == BIG_ENDIAN
#define Grid_ntohll(A) (A)
#else
#if __has_builtin(__builtin_bswap64)
#define Grid_ntohll(A) __builtin_bswap64(A)
#else
#error
#endif
#endif
#endif
#include <unistd.h>
#include <sys/utsname.h>
#include <pwd.h>
namespace Grid {
namespace QCD {
using namespace QCD;
using namespace Grid;
////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage
////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
////////////////////////////////////////////////////////////////////////////////
// header specification/interpretation
@ -86,50 +52,173 @@ class NerscField {
};
//////////////////////////////////////////////////////////////////////
// Bit and Physical Checksumming and QA of data
//////////////////////////////////////////////////////////////////////
inline void NerscGrid(GridBase *grid,NerscField &header)
{
assert(grid->_ndimension==4);
for(int d=0;d<4;d++) {
header.dimension[d] = grid->_fdimensions[d];
}
for(int d=0;d<4;d++) {
header.boundary[d] = std::string("PERIODIC");
}
}
template<class GaugeField>
inline void NerscStatistics(GaugeField & data,NerscField &header)
{
header.link_trace=Grid::QCD::WilsonLoops<GaugeField>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<GaugeField>::avgPlaquette(data);
}
inline void NerscMachineCharacteristics(NerscField &header)
{
// Who
struct passwd *pw = getpwuid (getuid());
if (pw) header.creator = std::string(pw->pw_name);
// When
std::time_t t = std::time(nullptr);
std::tm tm = *std::localtime(&t);
std::ostringstream oss;
// oss << std::put_time(&tm, "%c %Z");
header.creation_date = oss.str();
header.archive_date = header.creation_date;
// What
struct utsname name; uname(&name);
header.creator_hardware = std::string(name.nodename)+"-";
header.creator_hardware+= std::string(name.machine)+"-";
header.creator_hardware+= std::string(name.sysname)+"-";
header.creator_hardware+= std::string(name.release);
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
{
BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum);
}
inline void reconstruct3(LorentzColourMatrix & cm)
{
const int x=0;
const int y=1;
const int z=2;
for(int mu=0;mu<4;mu++){
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
}
}
template<class fobj,class sobj>
struct NerscSimpleMunger{
void operator() (fobj &in,sobj &out,uint32_t &csum){
for(int mu=0;mu<4;mu++){
for(int i=0;i<3;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)()(i,j);
}}}
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
};
};
template<class fobj,class sobj>
struct NerscSimpleUnmunger{
void operator() (sobj &in,fobj &out,uint32_t &csum){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<Nc;i++){
for(int j=0;j<Nc;j++){
out(mu)()(i,j) = in(mu)()(i,j);
}}}
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
};
};
template<class fobj,class sobj>
struct Nersc3x2munger{
void operator() (fobj &in,sobj &out,uint32_t &csum){
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
for(int mu=0;mu<4;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)(i)(j);
}}
}
reconstruct3(out);
}
};
template<class fobj,class sobj>
struct Nersc3x2unmunger{
void operator() (sobj &in,fobj &out,uint32_t &csum){
for(int mu=0;mu<4;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)(i)(j) = in(mu)()(i,j);
}}
}
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
}
};
////////////////////////////////////////////////////////////////////////////////
// Write and read from fstream; comput header offset for payload
////////////////////////////////////////////////////////////////////////////////
inline unsigned int writeNerscHeader(NerscField &field,std::string file)
{
std::ofstream fout(file,std::ios::out);
class NerscIO : public BinaryIO {
public:
static inline unsigned int writeHeader(NerscField &field,std::string file)
{
std::ofstream fout(file,std::ios::out);
fout.seekp(0,std::ios::beg);
fout << "BEGIN_HEADER" << std::endl;
fout << "HDR_VERSION = " << field.hdr_version << std::endl;
fout << "DATATYPE = " << field.data_type << std::endl;
fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
fout.seekp(0,std::ios::beg);
fout << "BEGIN_HEADER" << std::endl;
fout << "HDR_VERSION = " << field.hdr_version << std::endl;
fout << "DATATYPE = " << field.data_type << std::endl;
fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
for(int i=0;i<4;i++){
fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
}
// just to keep the space and write it later
fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
fout << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;
for(int i=0;i<4;i++){
fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
}
fout << "CHECKSUM = "<< std::hex << std::setw(16) << 0 << field.checksum << std::endl;
for(int i=0;i<4;i++){
fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
}
// just to keep the space and write it later
fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
fout << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;
for(int i=0;i<4;i++){
fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
}
fout << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;
fout << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;
fout << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;
fout << "CREATOR = " << field.creator << std::endl;
fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
fout << "CREATION_DATE = " << field.creation_date << std::endl;
fout << "ARCHIVE_DATE = " << field.archive_date << std::endl;
fout << "FLOATING_POINT = " << field.floating_point << std::endl;
fout << "END_HEADER" << std::endl;
field.data_start = fout.tellp();
return field.data_start;
fout << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::endl;
fout << std::dec;
fout << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;
fout << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;
fout << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;
fout << "CREATOR = " << field.creator << std::endl;
fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
fout << "CREATION_DATE = " << field.creation_date << std::endl;
fout << "ARCHIVE_DATE = " << field.archive_date << std::endl;
fout << "FLOATING_POINT = " << field.floating_point << std::endl;
fout << "END_HEADER" << std::endl;
field.data_start = fout.tellp();
return field.data_start;
}
// A little helper
inline void removeWhitespace(std::string &key)
{
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
}
// for the header-reader
inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
static inline int readHeader(std::string file,GridBase *grid, NerscField &field)
{
int offset=0;
std::map<std::string,std::string> header;
@ -163,7 +252,6 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
//////////////////////////////////////////////////
// chomp the values
//////////////////////////////////////////////////
field.hdr_version = header["HDR_VERSION"];
field.data_type = header["DATATYPE"];
field.storage_format = header["STORAGE_FORMAT"];
@ -200,314 +288,21 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
}
//////////////////////////////////////////////////////////////////////
// Utilities
//////////////////////////////////////////////////////////////////////
inline void reconstruct3(LorentzColourMatrix & cm)
{
const int x=0;
const int y=1;
const int z=2;
for(int mu=0;mu<4;mu++){
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
}
}
void inline be32toh_v(void *file_object,uint32_t bytes)
{
uint32_t * f = (uint32_t *)file_object;
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
f[i] = ntohl(f[i]);
}
}
void inline le32toh_v(void *file_object,uint32_t bytes)
{
uint32_t *fp = (uint32_t *)file_object;
uint32_t f;
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
f = fp[i];
// got network order and the network to host
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
fp[i] = ntohl(f);
}
}
void inline be64toh_v(void *file_object,uint32_t bytes)
{
uint64_t * f = (uint64_t *)file_object;
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
f[i] = Grid_ntohll(f[i]);
}
}
void inline le64toh_v(void *file_object,uint32_t bytes)
{
uint64_t *fp = (uint64_t *)file_object;
uint64_t f,g;
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
f = fp[i];
// got network order and the network to host
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
g = g << 32;
f = f >> 32;
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
fp[i] = ntohl(g);
}
}
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size,uint32_t &csum)
{
for(int i=0;i*sizeof(uint32_t)<buf_size;i++){
csum=csum+buf[i];
}
}
template<class fobj,class sobj>
struct NerscSimpleMunger{
void operator() (fobj &in,sobj &out,uint32_t &csum){
for(int mu=0;mu<4;mu++){
for(int i=0;i<3;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)()(i,j);
}}}
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
};
};
template<class fobj,class sobj>
struct NerscSimpleUnmunger{
void operator() (sobj &in,fobj &out,uint32_t &csum){
for(int mu=0;mu<4;mu++){
for(int i=0;i<3;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)()(i,j);
}}}
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
};
};
template<class fobj,class sobj>
struct Nersc3x2munger{
void operator() (fobj &in,sobj &out,uint32_t &csum){
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
for(int mu=0;mu<4;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)(i)(j);
}}
}
reconstruct3(out);
}
};
template<class fobj,class sobj>
struct Nersc3x2unmunger{
void operator() (sobj &in,fobj &out,uint32_t &csum){
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
for(int mu=0;mu<4;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)(i)(j) = in(mu)()(i,j);
}}
}
}
};
////////////////////////////////////////////////////////////////////////////
// Template wizardry to map types to strings for NERSC in an extensible way
////////////////////////////////////////////////////////////////////////////
template<class vobj> struct NerscDataType {
static void DataType (std::string &str) { str = std::string("4D_BINARY_UNKNOWN"); };
static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG"); };
};
template<> struct NerscDataType<iColourMatrix<ComplexD> > {
static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); };
static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG");};
};
template<> struct NerscDataType<iColourMatrix<ComplexF> > {
static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); };
static void FloatingPoint(std::string &str) { str = std::string("IEEE32BIG");};
};
//////////////////////////////////////////////////////////////////////
// Bit and Physical Checksumming and QA of data
//////////////////////////////////////////////////////////////////////
/*
template<class vobj> inline uint32_t NerscChecksum(Lattice<vobj> & data)
{
uint32_t sum;
for(int ss=0;ss<data._grid->Osites();ss++){
uint32_t *iptr = (uint32_t *)& data._odata[0] ;
for(int i=0;i<sizeof(vobj);i+=sizeof(uint32_t)){
sum=sum+iptr[i];
}
}
data._grid->globalSum(sum);
return sum;
}
*/
template<class vobj> inline void NerscPhysicalCharacteristics(Lattice<vobj> & data,NerscField &header)
{
header.data_type = NerscDataType<vobj>::DataType;
header.floating_point = NerscDataType<vobj>::FloatingPoint;
return;
}
template<> inline void NerscPhysicalCharacteristics(LatticeGaugeField & data,NerscField &header)
{
NerscDataType<decltype(data._odata[0])>::DataType(header.data_type);
NerscDataType<decltype(data._odata[0])>::FloatingPoint(header.floating_point);
header.link_trace=1.0;
header.plaquette =1.0;
}
template<class vobj> inline void NerscStatisics(Lattice<vobj> & data,NerscField &header)
{
assert(data._grid->_ndimension==4);
for(int d=0;d<4;d++)
header.dimension[d] = data._grid->_fdimensions[d];
// compute checksum and any physical properties contained for this type
// header.checksum = NerscChecksum(data);
NerscPhysicalCharacteristics(data,header);
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now the meat: the object readers
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj,class sobj,class fobj,class munger>
inline void readNerscObject(Lattice<vobj> &Umu,std::string file,munger munge,int offset,std::string &format)
template<class vsimd>
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file)
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
GridBase *grid = Umu._grid;
int offset = readHeader(file,Umu._grid,header);
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
// Find the location of each site and send to primary node
// for(int site=0; site < Layout::vol(); ++site){
// multi1d<int> coord = crtesn(site, Layout::lattSize());
// for(int dd=0; dd<Nd; dd++){ /* dir */
// cfg_in.readArray(su3_buffer, float_size, mat_size);
//
// Above from Chroma; defines loop order now that NERSC doc no longer
// available (how short sighted is that?)
{
std::ifstream fin(file,std::ios::binary|std::ios::in);
fin.seekg(offset);
Umu = zero;
uint32_t csum=0;
fobj file_object;
sobj munged;
for(int t=0;t<grid->_fdimensions[3];t++){
for(int z=0;z<grid->_fdimensions[2];z++){
for(int y=0;y<grid->_fdimensions[1];y++){
for(int x=0;x<grid->_fdimensions[0];x++){
std::vector<int> site({x,y,z,t});
if ( grid->IsBoss() ) {
fin.read((char *)&file_object,sizeof(file_object));
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
munge(file_object,munged,csum);
}
// The boss who read the file has their value poked
pokeSite(munged,Umu,site);
}}}}
}
}
template<class vobj,class sobj,class fobj,class munger>
inline void writeNerscObject(Lattice<vobj> &Umu,std::string file,munger munge,int offset,
int sequence,double lt,double pl)
{
GridBase *grid = Umu._grid;
NerscField header;
//////////////////////////////////////////////////
// First write the header; this is in wrong place
//////////////////////////////////////////////////
assert(grid->_ndimension == 4);
for(int d=0;d<4;d++){
header.dimension[d]=grid->_fdimensions[d];
header.boundary [d]=std::string("PERIODIC");;
}
header.hdr_version=std::string("WHATDAHECK");
// header.storage_format=storage_format<vobj>::string; // use template specialisation
// header.data_type=data_type<vobj>::string;
header.storage_format=std::string("debug");
header.data_type =std::string("debug");
//FIXME; use template specialisation to fill these out
header.link_trace =lt;
header.plaquette =pl;
header.checksum =0;
//
header.sequence_number =sequence;
header.ensemble_id =std::string("UKQCD");
header.ensemble_label =std::string("UKQCD");
header.creator =std::string("Tadahito");
header.creator_hardware=std::string("BlueGene/Q");
header.creation_date =std::string("AnnoDomini");
header.archive_date =std::string("AnnoDomini");
header.floating_point =std::string("IEEE64BIG");
// header.data_start=;
// unsigned int checksum;
//////////////////////////////////////////////////
// Now write the body
//////////////////////////////////////////////////
{
std::ofstream fout(file,std::ios::binary|std::ios::out);
fout.seekp(offset);
Umu = zero;
uint32_t csum=0;
fobj file_object;
sobj unmunged;
for(int t=0;t<grid->_fdimensions[3];t++){
for(int z=0;z<grid->_fdimensions[2];z++){
for(int y=0;y<grid->_fdimensions[1];y++){
for(int x=0;x<grid->_fdimensions[0];x++){
std::vector<int> site({x,y,z,t});
peekSite(unmunged,Umu,site);
munge(unmunged,file_object,csum);
// broadcast & insert
fout.write((char *)&file_object,sizeof(file_object));
}}}}
}
}
inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std::string file)
{
GridBase *grid = Umu._grid;
int offset = readNerscHeader(file,Umu._grid,header);
NerscField clone(header);
std::string format(header.floating_point);
@ -516,48 +311,106 @@ inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
uint32_t csum;
// depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
if ( ieee32 || ieee32big ) {
readNerscObject<vLorentzColourMatrix, LorentzColourMatrix, LorentzColour2x3F>
(Umu,file,
Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(),
offset,format);
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
}
if ( ieee64 || ieee64big ) {
readNerscObject<vLorentzColourMatrix, LorentzColourMatrix, LorentzColour2x3D>
(Umu,file,
Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),
offset,format);
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
}
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3X3") ) {
if ( ieee32 || ieee32big ) {
readNerscObject<vLorentzColourMatrix,LorentzColourMatrix,LorentzColourMatrixF>
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
}
if ( ieee64 || ieee64big ) {
readNerscObject<vLorentzColourMatrix,LorentzColourMatrix,LorentzColourMatrixD>
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
}
} else {
assert(0);
}
NerscStatistics<GaugeField>(Umu,clone);
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(csum == header.checksum );
std::cout<<GridLogMessage <<"Read NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
}
template<class vobj>
inline void writeNerscConfiguration(Lattice<vobj> &Umu,NerscField &header,std::string file)
template<class vsimd>
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32)
{
GridBase &grid = Umu._grid;
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
// Following should become arguments
NerscField header;
header.sequence_number = 1;
header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF";
typedef LorentzColourMatrixD fobj3D;
typedef LorentzColour2x3D fobj2D;
typedef LorentzColourMatrixF fobj3f;
typedef LorentzColour2x3F fobj2f;
GridBase *grid = Umu._grid;
NerscGrid(grid,header);
NerscStatistics<GaugeField>(Umu,header);
NerscMachineCharacteristics(header);
uint32_t csum;
int offset;
NerscStatisics(Umu,header);
if ( two_row ) {
int offset = writeNerscHeader(header,file);
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE");
Nersc3x2unmunger<fobj2D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum);
offset = writeHeader(header,file);
csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
writeNerscObject(Umu,NerscSimpleMunger<vobj,vobj>(),offset);
}
std::string file1 = file+"para";
int offset1 = writeHeader(header,file1);
int csum1=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file1,munge,offset,header.floating_point);
std::cout << GridLogMessage << " TESTING PARALLEL WRITE offsets " << offset1 << " "<< offset << std::endl;
std::cout << GridLogMessage <<std::hex<< " TESTING PARALLEL WRITE csums " << csum1 << " "<< csum << std::endl;
std::cout << std::dec;
}
assert(offset1==offset);
assert(csum1==csum);
} else {
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3X3");
NerscSimpleUnmunger<fobj3D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
offset = writeHeader(header,file);
csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
}
std::cout<<GridLogMessage <<"Written NERSC Configuration "<<file<< " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl;
}
};
}}
#endif

44
lib/pugixml/README.md Normal file
View File

@ -0,0 +1,44 @@
pugixml [![Build Status](https://travis-ci.org/zeux/pugixml.svg?branch=master)](https://travis-ci.org/zeux/pugixml) [![Build status](https://ci.appveyor.com/api/projects/status/9hdks1doqvq8pwe7/branch/master?svg=true)](https://ci.appveyor.com/project/zeux/pugixml)
=======
pugixml is a C++ XML processing library, which consists of a DOM-like interface with rich traversal/modification
capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0
implementation for complex data-driven tree queries. Full Unicode support is also available, with Unicode interface
variants and conversions between different Unicode encodings (which happen automatically during parsing/saving).
pugixml is used by a lot of projects, both open-source and proprietary, for performance and easy-to-use interface.
## Documentation
Documentation for the current release of pugixml is available on-line as two separate documents:
* [Quick-start guide](http://pugixml.org/docs/quickstart.html), that aims to provide enough information to start using the library;
* [Complete reference manual](http://pugixml.org/docs/manual.html), that describes all features of the library in detail.
Youre advised to start with the quick-start guide; however, many important library features are either not described in it at all or only mentioned briefly; if you require more information you should read the complete manual.
## License
This library is available to anybody free of charge, under the terms of MIT License:
Copyright (c) 2006-2015 Arseny Kapoulkine
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,71 @@
/**
* pugixml parser - version 1.6
* --------------------------------------------------------
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at http://pugixml.org/
*
* This library is distributed under the MIT License. See notice at the end
* of this file.
*
* This work is based on the pugxml parser, which is:
* Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
*/
#ifndef HEADER_PUGICONFIG_HPP
#define HEADER_PUGICONFIG_HPP
// Uncomment this to enable wchar_t mode
// #define PUGIXML_WCHAR_MODE
// Uncomment this to disable XPath
// #define PUGIXML_NO_XPATH
// Uncomment this to disable STL
// #define PUGIXML_NO_STL
// Uncomment this to disable exceptions
// #define PUGIXML_NO_EXCEPTIONS
// Set this to control attributes for public classes/functions, i.e.:
// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
// Tune these constants to adjust memory-related behavior
// #define PUGIXML_MEMORY_PAGE_SIZE 32768
// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
// Uncomment this to switch to header-only version
// #define PUGIXML_HEADER_ONLY
// Uncomment this to enable long long support
// #define PUGIXML_HAS_LONG_LONG
#endif
/**
* Copyright (c) 2006-2015 Arseny Kapoulkine
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/

12485
lib/pugixml/pugixml.cc Normal file

File diff suppressed because it is too large Load Diff

1400
lib/pugixml/pugixml.h Normal file

File diff suppressed because it is too large Load Diff

52
lib/pugixml/readme.txt Normal file
View File

@ -0,0 +1,52 @@
pugixml 1.6 - an XML processing library
Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
Report bugs and download new versions at http://pugixml.org/
This is the distribution of pugixml, which is a C++ XML processing library,
which consists of a DOM-like interface with rich traversal/modification
capabilities, an extremely fast XML parser which constructs the DOM tree from
an XML file/buffer, and an XPath 1.0 implementation for complex data-driven
tree queries. Full Unicode support is also available, with Unicode interface
variants and conversions between different Unicode encodings (which happen
automatically during parsing/saving).
The distribution contains the following folders:
contrib/ - various contributions to pugixml
docs/ - documentation
docs/samples - pugixml usage examples
docs/quickstart.html - quick start guide
docs/manual.html - complete manual
scripts/ - project files for IDE/build systems
src/ - header and source files
readme.txt - this file.
This library is distributed under the MIT License:
Copyright (c) 2006-2015 Arseny Kapoulkine
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

View File

@ -19,15 +19,25 @@ namespace QCD {
static const int Nd=4;
static const int Nhs=2; // half spinor
static const int Nds=8; // double stored gauge field
static const int Ngp=2; // gparity index range
//////////////////////////////////////////////////////////////////////////////
// QCD iMatrix types
// Index conventions: Lorentz x Spin x Colour
//////////////////////////////////////////////////////////////////////////////
static const int ColourIndex = 1;
static const int SpinIndex = 2;
static const int LorentzIndex= 3;
static const int ColourIndex = 2;
static const int SpinIndex = 1;
static const int LorentzIndex= 0;
// Useful traits is this a spin index
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
const int SpinorIndex = 2;
template<typename T> struct isSpinor {
static const bool value = (SpinorIndex==T::TensorLevel);
};
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
// ChrisK very keen to add extra space for Gparity doubling.
//
@ -49,6 +59,9 @@ namespace QCD {
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
// Spin matrix
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iSpinMatrix<ComplexF > SpinMatrixF;
@ -140,7 +153,7 @@ namespace QCD {
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
// singlets
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
@ -148,7 +161,7 @@ namespace QCD {
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
@ -237,6 +250,8 @@ namespace QCD {
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
template<class GF> using LorentzScalar = Lattice<iScalar<typename GF::vector_object::element> >;
// Uhgg... typing this hurt ;)
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
typedef Lattice<vColourVector> LatticeStaggeredFermion;
@ -252,47 +267,47 @@ namespace QCD {
//////////////////////////////////////////////////////////////////////////////
//spin
template<class vobj> auto peekSpin(const vobj &rhs,int i) -> decltype(peekIndex<SpinIndex>(rhs,0))
template<class vobj> auto peekSpin(const vobj &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
{
return peekIndex<SpinIndex>(rhs,i);
return PeekIndex<SpinIndex>(rhs,i);
}
template<class vobj> auto peekSpin(const vobj &rhs,int i,int j) -> decltype(peekIndex<SpinIndex>(rhs,0,0))
template<class vobj> auto peekSpin(const vobj &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
{
return peekIndex<SpinIndex>(rhs,i,j);
return PeekIndex<SpinIndex>(rhs,i,j);
}
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i) -> decltype(peekIndex<SpinIndex>(rhs,0))
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
{
return peekIndex<SpinIndex>(rhs,i);
return PeekIndex<SpinIndex>(rhs,i);
}
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i,int j) -> decltype(peekIndex<SpinIndex>(rhs,0,0))
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
{
return peekIndex<SpinIndex>(rhs,i,j);
return PeekIndex<SpinIndex>(rhs,i,j);
}
//colour
template<class vobj> auto peekColour(const vobj &rhs,int i) -> decltype(peekIndex<ColourIndex>(rhs,0))
template<class vobj> auto peekColour(const vobj &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
{
return peekIndex<ColourIndex>(rhs,i);
return PeekIndex<ColourIndex>(rhs,i);
}
template<class vobj> auto peekColour(const vobj &rhs,int i,int j) -> decltype(peekIndex<ColourIndex>(rhs,0,0))
template<class vobj> auto peekColour(const vobj &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
{
return peekIndex<ColourIndex>(rhs,i,j);
return PeekIndex<ColourIndex>(rhs,i,j);
}
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i) -> decltype(peekIndex<ColourIndex>(rhs,0))
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
{
return peekIndex<ColourIndex>(rhs,i);
return PeekIndex<ColourIndex>(rhs,i);
}
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i,int j) -> decltype(peekIndex<ColourIndex>(rhs,0,0))
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
{
return peekIndex<ColourIndex>(rhs,i,j);
return PeekIndex<ColourIndex>(rhs,i,j);
}
//lorentz
template<class vobj> auto peekLorentz(const vobj &rhs,int i) -> decltype(peekIndex<LorentzIndex>(rhs,0))
template<class vobj> auto peekLorentz(const vobj &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
{
return peekIndex<LorentzIndex>(rhs,i);
return PeekIndex<LorentzIndex>(rhs,i);
}
template<class vobj> auto peekLorentz(const Lattice<vobj> &rhs,int i) -> decltype(peekIndex<LorentzIndex>(rhs,0))
template<class vobj> auto peekLorentz(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
{
return peekIndex<LorentzIndex>(rhs,i);
return PeekIndex<LorentzIndex>(rhs,i);
}
//////////////////////////////////////////////
@ -303,35 +318,35 @@ namespace QCD {
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs,
int i)
{
pokeIndex<ColourIndex>(lhs,rhs,i);
PokeIndex<ColourIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j)
{
pokeIndex<ColourIndex>(lhs,rhs,i,j);
PokeIndex<ColourIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs,
int i)
{
pokeIndex<SpinIndex>(lhs,rhs,i);
PokeIndex<SpinIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j)
{
pokeIndex<SpinIndex>(lhs,rhs,i,j);
PokeIndex<SpinIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeLorentz(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs,
int i)
{
pokeIndex<LorentzIndex>(lhs,rhs,i);
PokeIndex<LorentzIndex>(lhs,rhs,i);
}
//////////////////////////////////////////////
@ -411,6 +426,11 @@ namespace QCD {
#include <qcd/utils/LinalgUtils.h>
#include <qcd/utils/CovariantCshift.h>
#include <qcd/utils/WilsonLoops.h>
#include <qcd/utils/SUn.h>
#include <qcd/action/Actions.h>
#include <qcd/hmc/integrators/Integrator.h>
#include <qcd/hmc/integrators/Integrator_algorithm.h>
#include <qcd/hmc/HMC.h>
#endif

View File

@ -0,0 +1,58 @@
#ifndef QCD_ACTION_BASE
#define QCD_ACTION_BASE
namespace Grid {
namespace QCD{
template<class GaugeField>
class Action {
public:
// Boundary conditions? // Heatbath?
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions
virtual RealD S (const GaugeField &U) = 0; // evaluate the action
virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative
virtual ~Action() {};
};
// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh
/*
template<class GaugeField, class FermionField>
class PseudoFermionAction : public Action<GaugeField> {
public:
FermionField Phi;
GridParallelRNG &pRNG;
GridBase &Grid;
PseudoFermionAction(GridBase &_Grid,GridParallelRNG &_pRNG) : Grid(_Grid), Phi(&_Grid), pRNG(_pRNG) {
};
virtual void refresh(const GaugeField &gauge) {
gaussian(Phi,pRNG);
};
};
*/
template<class GaugeField> struct ActionLevel{
public:
typedef Action<GaugeField>* ActPtr; // now force the same colours as the rest of the code
int multiplier;
std::vector<ActPtr> actions;
ActionLevel(int mul = 1) : multiplier(mul) {
assert (mul > 0);
};
void push_back(ActPtr ptr){
actions.push_back(ptr);
}
};
template<class GaugeField> using ActionSet = std::vector<ActionLevel< GaugeField > >;
}}
#endif

View File

@ -0,0 +1,29 @@
#ifndef GRID_QCD_ACTION_PARAMS_H
#define GRID_QCD_ACTION_PARAMS_H
namespace Grid {
namespace QCD {
// These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams {
std::vector<int> twists;
};
struct WilsonImplParams { };
struct OneFlavourRationalParams {
RealD lo;
RealD hi;
int MaxIter; // Vector?
RealD tolerance; // Vector?
int degree=10;
int precision=64;
OneFlavourRationalParams (RealD _lo,RealD _hi,int _maxit,RealD tol=1.0e-8,int _degree = 10,int _precision=64) :
lo(_lo), hi(_hi), MaxIter(_maxit), tolerance(tol), degree(_degree), precision(_precision)
{};
};
}}
#endif

View File

@ -6,68 +6,173 @@
// are separating the concept of the operator from that of action.
//
// The FermAction contains methods to create
//
// * Linear operators (Hermitian and non-hermitian) .. my LinearOperator
// * System solvers (Hermitian and non-hermitian) .. my OperatorFunction
// * MultiShift System solvers (Hermitian and non-hermitian) .. my OperatorFunction
////////////////////////////////////////////
// Abstract base interface
////////////////////////////////////////////
#include <qcd/action/fermion/FermionOperator.h>
#include <qcd/action/ActionBase.h>
#include <qcd/action/ActionParams.h>
////////////////////////////////////////////
// Utility functions
////////////////////////////////////////////
#include <qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions
#include <qcd/action/fermion/FermionOperatorImpl.h>
#include <qcd/action/fermion/FermionOperator.h>
#include <qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
////////////////////////////////////////////
// 4D formulations
// Gauge Actions
////////////////////////////////////////////
#include <qcd/action/fermion/WilsonFermion.h>
#include <qcd/action/gauge/WilsonGaugeAction.h>
namespace Grid {
namespace QCD {
typedef WilsonGaugeAction<LatticeGaugeField> WilsonGaugeActionR;
typedef WilsonGaugeAction<LatticeGaugeFieldF> WilsonGaugeActionF;
typedef WilsonGaugeAction<LatticeGaugeFieldD> WilsonGaugeActionD;
}}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Explicit explicit template instantiation is still required in the .cc files
//
// - CayleyFermion5D.cc
// - PartialFractionFermion5D.cc
// - WilsonFermion5D.cc
// - WilsonKernelsHand.cc
// - ContinuedFractionFermion5D.cc
// - WilsonFermion.cc
// - WilsonKernels.cc
//
// The explicit instantiation is only avoidable if we move this source to headers and end up with include/parse/recompile
// for EVERY .cc file. This define centralises the list and restores global push of impl cases
////////////////////////////////////////////////////////////////////////////////////////////////////
#define FermOpTemplateInstantiate(A) \
template class A<WilsonImplF>; \
template class A<WilsonImplD>;
// template class A<GparityWilsonImplF>; \
// template class A<GparityWilsonImplD>;
////////////////////////////////////////////
// Fermion operators / actions
////////////////////////////////////////////
#include <qcd/action/fermion/WilsonFermion.h> // 4d wilson like
#include <qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
//#include <qcd/action/fermion/CloverFermion.h>
////////////////////////////////////////////
// 5D formulations...
////////////////////////////////////////////
#include <qcd/action/fermion/WilsonFermion5D.h> // used by all 5d overlap types
//////////
// Cayley
//////////
#include <qcd/action/fermion/CayleyFermion5D.h>
#include <qcd/action/fermion/CayleyFermion5D.h> // Cayley types
#include <qcd/action/fermion/DomainWallFermion.h>
#include <qcd/action/fermion/DomainWallFermion.h>
#include <qcd/action/fermion/MobiusFermion.h>
#include <qcd/action/fermion/ScaledShamirFermion.h>
#include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
#include <qcd/action/fermion/MobiusZolotarevFermion.h>
#include <qcd/action/fermion/ShamirZolotarevFermion.h>
#include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
//////////////////////
// Continued fraction
//////////////////////
#include <qcd/action/fermion/ContinuedFractionFermion5D.h>
#include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
//////////////////////
// Partial fraction
//////////////////////
#include <qcd/action/fermion/PartialFractionFermion5D.h>
#include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
#include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
////////////////////////////////////////////////////////////////////////////////////////////////////
// More maintainable to maintain the following typedef list centrally, as more "impl" targets
// are added, (e.g. extension for gparity, half precision project in comms etc..)
////////////////////////////////////////////////////////////////////////////////////////////////////
// Cayley 5d
namespace Grid {
namespace QCD {
typedef WilsonFermion<WilsonImplR> WilsonFermionR;
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;
typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
typedef MobiusZolotarevFermion<WilsonImplR> MobiusZolotarevFermionR;
typedef MobiusZolotarevFermion<WilsonImplF> MobiusZolotarevFermionF;
typedef MobiusZolotarevFermion<WilsonImplD> MobiusZolotarevFermionD;
typedef ShamirZolotarevFermion<WilsonImplR> ShamirZolotarevFermionR;
typedef ShamirZolotarevFermion<WilsonImplF> ShamirZolotarevFermionF;
typedef ShamirZolotarevFermion<WilsonImplD> ShamirZolotarevFermionD;
typedef OverlapWilsonCayleyTanhFermion<WilsonImplR> OverlapWilsonCayleyTanhFermionR;
typedef OverlapWilsonCayleyTanhFermion<WilsonImplF> OverlapWilsonCayleyTanhFermionF;
typedef OverlapWilsonCayleyTanhFermion<WilsonImplD> OverlapWilsonCayleyTanhFermionD;
typedef OverlapWilsonCayleyZolotarevFermion<WilsonImplR> OverlapWilsonCayleyZolotarevFermionR;
typedef OverlapWilsonCayleyZolotarevFermion<WilsonImplF> OverlapWilsonCayleyZolotarevFermionF;
typedef OverlapWilsonCayleyZolotarevFermion<WilsonImplD> OverlapWilsonCayleyZolotarevFermionD;
// Continued fraction
typedef OverlapWilsonContFracTanhFermion<WilsonImplR> OverlapWilsonContFracTanhFermionR;
typedef OverlapWilsonContFracTanhFermion<WilsonImplF> OverlapWilsonContFracTanhFermionF;
typedef OverlapWilsonContFracTanhFermion<WilsonImplD> OverlapWilsonContFracTanhFermionD;
typedef OverlapWilsonContFracZolotarevFermion<WilsonImplR> OverlapWilsonContFracZolotarevFermionR;
typedef OverlapWilsonContFracZolotarevFermion<WilsonImplF> OverlapWilsonContFracZolotarevFermionF;
typedef OverlapWilsonContFracZolotarevFermion<WilsonImplD> OverlapWilsonContFracZolotarevFermionD;
// Partial fraction
typedef OverlapWilsonPartialFractionTanhFermion<WilsonImplR> OverlapWilsonPartialFractionTanhFermionR;
typedef OverlapWilsonPartialFractionTanhFermion<WilsonImplF> OverlapWilsonPartialFractionTanhFermionF;
typedef OverlapWilsonPartialFractionTanhFermion<WilsonImplD> OverlapWilsonPartialFractionTanhFermionD;
typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplR> OverlapWilsonPartialFractionZolotarevFermionR;
typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplF> OverlapWilsonPartialFractionZolotarevFermionF;
typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplD> OverlapWilsonPartialFractionZolotarevFermionD;
// Gparity cases; partial list until tested
typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR;
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
}}
///////////////////////////////////////////////////////////////////////////////
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
///////////////////////////////////////////////////////////////////////////////
#include <qcd/action/fermion/g5HermitianLinop.h>
////////////////////////////////////////
// Pseudo fermion combinations for HMC
////////////////////////////////////////
#include <qcd/action/pseudofermion/EvenOddSchurDifferentiable.h>
#include <qcd/action/pseudofermion/TwoFlavour.h>
#include <qcd/action/pseudofermion/TwoFlavourRatio.h>
#include <qcd/action/pseudofermion/TwoFlavourEvenOdd.h>
#include <qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h>
//IroIro inserted general "Nf" param; could also be done,
//but not clear why unless into large Nf BSM studies
//Even there, don't want the explicit (2) on power denominator
//if even number of flavours, so further generalised interface
//would be required but easy.
#include <qcd/action/pseudofermion/OneFlavourRational.h>
#include <qcd/action/pseudofermion/OneFlavourRationalRatio.h>
#include <qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
#include <qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
#endif

View File

@ -2,27 +2,27 @@
namespace Grid {
namespace QCD {
CayleyFermion5D::CayleyFermion5D(LatticeGaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5) :
WilsonFermion5D(_Umu,
template<class Impl>
CayleyFermion5D<Impl>::CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_M5),
FourDimRedBlackGrid,_M5,p),
mass(_mass)
{
}
// override multiply
RealD CayleyFermion5D::M (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{
LatticeFermion Din(psi._grid);
// Assemble Din
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
// Din = bs psi[s] + cs[s] psi[s+1}
@ -37,11 +37,57 @@ namespace QCD {
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
}
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
axpby_ssp_pminus(Din,1.0,Din,-mass*cs[Ls-1],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus (Din,bs[s],psi,-mass*cs[0],psi,s,0);
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
} else {
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
}
}
}
DW(Din,chi,DaggerNo);
// override multiply
template<class Impl>
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
FermionField Din(psi._grid);
// Assemble Din
/*
for(int s=0;s<Ls;s++){
if ( s==0 ) {
// Din = bs psi[s] + cs[s] psi[s+1}
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
// Din+= -mass*cs[s] psi[s+1}
axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
} else {
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
}
}
*/
Meooe5D(psi,Din);
this->DW(Din,chi,DaggerNo);
// ((b D_W + D_w hop terms +1) on s-diag
axpby(chi,1.0,1.0,chi,psi);
// Call Mooee??
for(int s=0;s<Ls;s++){
if ( s==0 ){
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
@ -57,20 +103,26 @@ namespace QCD {
return norm2(chi);
}
RealD CayleyFermion5D::Mdag (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
{
// Under adjoint
//D1+ D1- P- -> D1+^dag P+ D2-^dag
//D2- P+ D2+ P-D1-^dag D2+dag
LatticeFermion Din(psi._grid);
FermionField Din(psi._grid);
// Apply Dw
DW(psi,Din,DaggerYes);
this->DW(psi,Din,DaggerYes);
Meooe5D(Din,chi);
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
// Collect the terms in DW
// Chi = bs Din[s] + cs[s] Din[s+1}
// Chi+= -mass*cs[s] psi[s+1}
/*
if ( s==0 ) {
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,-mass*cs[Ls-1],Din,s,Ls-1);
@ -81,6 +133,10 @@ namespace QCD {
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
}
*/
// FIXME just call MooeeDag??
// Collect the terms indept of DW
if ( s==0 ){
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1);
@ -99,10 +155,17 @@ namespace QCD {
}
// half checkerboard operations
void CayleyFermion5D::Meooe (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
LatticeFermion tmp(psi._grid);
int Ls=this->Ls;
FermionField tmp(psi._grid);
// Assemble the 5d matrix
Meooe5D(psi,tmp);
#if 0
std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
// tmp = bs psi[s] + cs[s] psi[s+1}
@ -117,24 +180,33 @@ namespace QCD {
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
}
}
std::cout << "Meooe Test replacement norm2 tmp old = " <<norm2(tmp)<<std::endl;
#endif
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
DhopEO(tmp,chi,DaggerNo);
this->DhopEO(tmp,chi,DaggerNo);
} else {
DhopOE(tmp,chi,DaggerNo);
this->DhopOE(tmp,chi,DaggerNo);
}
}
void CayleyFermion5D::MeooeDag (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
{
LatticeFermion tmp(psi._grid);
FermionField tmp(psi._grid);
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
DhopEO(psi,tmp,DaggerYes);
this->DhopEO(psi,tmp,DaggerYes);
} else {
DhopOE(psi,tmp,DaggerYes);
this->DhopOE(psi,tmp,DaggerYes);
}
Meooe5D(tmp,chi);
#if 0
std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl;
// Assemble the 5d matrix
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pplus(chi,beo[s],tmp, -ceo[s+1] ,tmp,s,s+1);
@ -147,10 +219,15 @@ namespace QCD {
axpby_ssp_pminus(chi,1.0 ,chi,-ceo[s-1],tmp,s,s-1);
}
}
std::cout << "Meooe Test replacement norm2 chi old = " <<norm2(chi)<<std::endl;
#endif
}
void CayleyFermion5D::Mooee (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
for (int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
@ -165,8 +242,10 @@ namespace QCD {
}
}
void CayleyFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
LatticeFermion tmp(psi._grid);
template<class Impl>
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
int Ls=this->Ls;
FermionField tmp(psi._grid);
// Assemble the 5d matrix
for(int s=0;s<Ls;s++){
if ( s==0 ) {
@ -183,11 +262,13 @@ namespace QCD {
}
}
// Apply 4d dslash fragment
DhopDir(tmp,chi,dir,disp);
this->DhopDir(tmp,chi,dir,disp);
}
void CayleyFermion5D::MooeeDag (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
if ( s==0 ) {
@ -203,8 +284,10 @@ namespace QCD {
}
}
void CayleyFermion5D::MooeeInv (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
for (int s=1;s<Ls;s++){
@ -227,8 +310,10 @@ namespace QCD {
}
}
void CayleyFermion5D::MooeeInvDag (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
for (int s=1;s<Ls;s++){
@ -249,16 +334,66 @@ namespace QCD {
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
}
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDeriv(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din);
this->DhopDeriv(mat,Din,V,dag);
}
};
template<class Impl>
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDerivOE(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din);
this->DhopDerivOE(mat,Din,V,dag);
}
};
template<class Impl>
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDerivEO(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din);
this->DhopDerivEO(mat,Din,V,dag);
}
};
// Tanh
void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{
SetCoefficientsZolotarev(1.0,zdata,b,c);
}
//Zolo
void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
int Ls=this->Ls;
///////////////////////////////////////////////////////////
// The Cayley coeffs (unprec)
@ -308,8 +443,8 @@ namespace QCD {
ceo.resize(Ls);
for(int i=0;i<Ls;i++){
bee[i]=as[i]*(bs[i]*(4.0-M5) +1.0);
cee[i]=as[i]*(1.0-cs[i]*(4.0-M5));
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
beo[i]=as[i]*bs[i];
ceo[i]=-as[i]*cs[i];
}
@ -362,6 +497,8 @@ namespace QCD {
}
}
FermOpTemplateInstantiate(CayleyFermion5D);
}}

View File

@ -5,25 +5,36 @@ namespace Grid {
namespace QCD {
class CayleyFermion5D : public WilsonFermion5D
template<class Impl>
class CayleyFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// override multiply
virtual RealD M (const LatticeFermion &in, LatticeFermion &out);
virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out);
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// half checkerboard operations
virtual void Meooe (const LatticeFermion &in, LatticeFermion &out);
virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out);
virtual void Mooee (const LatticeFermion &in, LatticeFermion &out);
virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out);
virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out);
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
virtual void Instantiatable(void)=0;
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// Efficient support for multigrid coarsening
virtual void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
void Meooe5D (const FermionField &in, FermionField &out);
void MeooeDag5D (const FermionField &in, FermionField &out);
// protected:
RealD mass;
@ -48,12 +59,12 @@ namespace Grid {
std::vector<RealD> dee;
// Constructors
CayleyFermion5D(LatticeGaugeField &_Umu,
CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5);
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
protected:
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);

View File

@ -3,20 +3,22 @@
namespace Grid {
namespace QCD {
void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
template<class Impl>
void ContinuedFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
{
SetCoefficientsZolotarev(1.0/scale,zdata);
}
void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
template<class Impl>
void ContinuedFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
{
// How to check Ls matches??
// std::cout << Ls << " Ls"<<std::endl;
// std::cout << zdata->n << " - n"<<std::endl;
// std::cout << zdata->da << " -da "<<std::endl;
// std::cout << zdata->db << " -db"<<std::endl;
// std::cout << zdata->dn << " -dn"<<std::endl;
// std::cout << zdata->dd << " -dd"<<std::endl;
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
int Ls = this->Ls;
assert(zdata->db==Ls);// Beta has Ls coeffs
R=(1+this->mass)/(1-this->mass);
@ -39,7 +41,7 @@ namespace Grid {
ZoloHiInv =1.0/zolo_hi;
dw_diag = (4.0-M5)*ZoloHiInv;
dw_diag = (4.0-this->M5)*ZoloHiInv;
See.resize(Ls);
Aee.resize(Ls);
@ -55,17 +57,20 @@ namespace Grid {
See[s] = Aee[s] - 1.0/See[s-1];
}
for(int s=0;s<Ls;s++){
std::cout <<"s = "<<s<<" Beta "<<Beta[s]<<" Aee "<<Aee[s] <<" See "<<See[s] <<std::endl;
std::cout<<GridLogMessage <<"s = "<<s<<" Beta "<<Beta[s]<<" Aee "<<Aee[s] <<" See "<<See[s] <<std::endl;
}
}
RealD ContinuedFractionFermion5D::M (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
RealD ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
LatticeFermion D(psi._grid);
int Ls = this->Ls;
DW(psi,D,DaggerNo);
FermionField D(psi._grid);
this->DW(psi,D,DaggerNo);
int sign=1;
for(int s=0;s<Ls;s++){
@ -83,15 +88,20 @@ namespace Grid {
}
return norm2(chi);
}
RealD ContinuedFractionFermion5D::Mdag (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
RealD ContinuedFractionFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
{
// This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
// The rest of matrix is symmetric.
// Can ignore "dag"
return M(psi,chi);
}
void ContinuedFractionFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
int Ls = this->Ls;
this->DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
@ -102,13 +112,16 @@ namespace Grid {
sign=-sign;
}
}
void ContinuedFractionFermion5D::Meooe (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
} else {
DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
}
int sign=1;
@ -121,12 +134,16 @@ namespace Grid {
sign=-sign;
}
}
void ContinuedFractionFermion5D::MeooeDag (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
{
Meooe(psi,chi);
this->Meooe(psi,chi);
}
void ContinuedFractionFermion5D::Mooee (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
@ -144,12 +161,16 @@ namespace Grid {
}
}
void ContinuedFractionFermion5D::MooeeDag (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
Mooee(psi,chi);
this->Mooee(psi,chi);
}
void ContinuedFractionFermion5D::MooeeInv (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;
// Apply Linv
axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0);
for(int s=1;s<Ls;s++){
@ -165,27 +186,88 @@ namespace Grid {
axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1);
}
}
void ContinuedFractionFermion5D::MooeeInvDag (const LatticeFermion &psi, LatticeFermion &chi)
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
MooeeInv(psi,chi);
this->MooeeInv(psi,chi);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDeriv(mat,D,V,DaggerNo);
};
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDerivOE(mat,D,V,DaggerNo);
};
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
} else {
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
}
sign=-sign;
}
this->DhopDerivEO(mat,D,V,DaggerNo);
};
// Constructors
ContinuedFractionFermion5D::ContinuedFractionFermion5D(
LatticeGaugeField &_Umu,
template<class Impl>
ContinuedFractionFermion5D<Impl>::ContinuedFractionFermion5D(
GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5) :
WilsonFermion5D(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5),
RealD _mass,RealD M5,const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5,p),
mass(_mass)
{
int Ls = this->Ls;
assert((Ls&0x1)==1); // Odd Ls required
}
FermOpTemplateInstantiate(ContinuedFractionFermion5D);
}
}

View File

@ -5,35 +5,43 @@ namespace Grid {
namespace QCD {
class ContinuedFractionFermion5D : public WilsonFermion5D
template<class Impl>
class ContinuedFractionFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// override multiply
virtual RealD M (const LatticeFermion &in, LatticeFermion &out);
virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out);
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// half checkerboard operaions
virtual void Meooe (const LatticeFermion &in, LatticeFermion &out);
virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out);
virtual void Mooee (const LatticeFermion &in, LatticeFermion &out);
virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out);
virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out);
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// virtual void Instantiatable(void)=0;
virtual void Instantiatable(void) =0;
// Efficient support for multigrid coarsening
virtual void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
// Constructors
ContinuedFractionFermion5D(LatticeGaugeField &_Umu,
ContinuedFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5);
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
protected:

View File

@ -7,24 +7,27 @@ namespace Grid {
namespace QCD {
class DomainWallFermion : public CayleyFermion5D
template<class Impl>
class DomainWallFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
DomainWallFermion(LatticeGaugeField &_Umu,
DomainWallFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5) :
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) :
CayleyFermion5D(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
@ -32,9 +35,9 @@ namespace Grid {
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
std::cout << "DomainWallFermion with Ls="<<Ls<<std::endl;
std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
// Call base setter
this->CayleyFermion5D::SetCoefficientsTanh(zdata,1.0,0.0);
this->SetCoefficientsTanh(zdata,1.0,0.0);
Approx::zolotarev_free(zdata);
}

View File

@ -5,16 +5,20 @@ namespace Grid {
namespace QCD {
//////////////////////////////////////////////////////////////////////////////
// Four component fermions
// Should type template the vector and gauge types
// Think about multiple representations
//////////////////////////////////////////////////////////////////////////////
template<class FermionField,class GaugeField>
class FermionOperator : public CheckerBoardedSparseMatrixBase<FermionField>
////////////////////////////////////////////////////////////////
// Allow to select between gauge representation rank bc's, flavours etc.
// and single/double precision.
////////////////////////////////////////////////////////////////
template<class Impl>
class FermionOperator : public CheckerBoardedSparseMatrixBase<typename Impl::FermionField>, public Impl
{
public:
INHERIT_IMPL_TYPES(Impl);
FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
@ -28,6 +32,8 @@ namespace Grid {
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
// half checkerboard operaions
virtual int ConstEE(void) { return 1; }; // clover returns zero as EE depends on gauge field
virtual void Meooe (const FermionField &in, FermionField &out)=0;
virtual void MeooeDag (const FermionField &in, FermionField &out)=0;
virtual void Mooee (const FermionField &in, FermionField &out)=0;
@ -39,13 +45,31 @@ namespace Grid {
virtual void Dhop (const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
virtual void Mdiag(const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);};
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);};
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);};
virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;}; // Clover can override these
virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;};
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
///////////////////////////////////////////////
// Updates gauge field during HMC
///////////////////////////////////////////////
virtual void ImportGauge(const GaugeField & _U)=0;
};
}
}
#endif

View File

@ -0,0 +1,359 @@
#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H
#define GRID_QCD_FERMION_OPERATOR_IMPL_H
namespace Grid {
namespace QCD {
//////////////////////////////////////////////
// Template parameter class constructs to package
// externally control Fermion implementations
// in orthogonal directions
//
// Ultimately need Impl to always define types where XXX is opaque
//
// typedef typename XXX Simd;
// typedef typename XXX GaugeLinkField;
// typedef typename XXX GaugeField;
// typedef typename XXX GaugeActField;
// typedef typename XXX FermionField;
// typedef typename XXX DoubledGaugeField;
// typedef typename XXX SiteSpinor;
// typedef typename XXX SiteHalfSpinor;
// typedef typename XXX Compressor;
//
// and Methods:
// void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
// void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
// void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
// void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
// void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
//
//
// To acquire the typedefs from "Base" (either a base class or template param) use:
//
// INHERIT_GIMPL_TYPES(Base)
// INHERIT_FIMPL_TYPES(Base)
// INHERIT_IMPL_TYPES(Base)
//
// The Fermion operators will do the following:
//
// struct MyOpParams {
// RealD mass;
// };
//
//
// template<class Impl>
// class MyOp : pubic<Impl> {
// public:
//
// INHERIT_ALL_IMPL_TYPES(Impl);
//
// MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam)
// {
//
// };
//
// }
//////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// Implementation dependent gauge types
////////////////////////////////////////////////////////////////////////
#define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base)\
INHERIT_FIMPL_TYPES(Base)
#define INHERIT_GIMPL_TYPES(GImpl) \
typedef typename GImpl::Simd Simd;\
typedef typename GImpl::GaugeLinkField GaugeLinkField;\
typedef typename GImpl::GaugeField GaugeField;
// Composition with smeared link, bc's etc.. probably need multiple inheritance
// Variable precision "S" and variable Nc
template<class S,int Nrepresentation=Nc>
class ImplGauge {
public:
typedef S Simd;
template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
template<typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >;
typedef iImplGaugeLink <Simd> SiteGaugeLink;
typedef iImplGaugeField <Simd> SiteGaugeField;
typedef Lattice<SiteGaugeLink> GaugeLinkField; // bit ugly naming; polarised gauge field, lorentz... all ugly
typedef Lattice<SiteGaugeField> GaugeField;
};
////////////////////////////////////////////////////////////////////////
// Implementation dependent fermion types
////////////////////////////////////////////////////////////////////////
#define INHERIT_FIMPL_TYPES(Impl)\
typedef typename Impl::FermionField FermionField; \
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
typedef typename Impl::SiteSpinor SiteSpinor; \
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
typedef typename Impl::Compressor Compressor; \
typedef typename Impl::StencilImpl StencilImpl; \
typedef typename Impl::ImplParams ImplParams;
///////
// Single flavour four spinors with colour index
///////
template<class S,int Nrepresentation=Nc>
class WilsonImpl : public ImplGauge<S,Nrepresentation> {
public:
typedef ImplGauge<S,Nrepresentation> Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >;
typedef iImplSpinor <Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef CartesianStencil<SiteSpinor,SiteHalfSpinor,Compressor> StencilImpl;
ImplParams Params;
WilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){
mult(&phi(),&U(mu),&chi());
}
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds._grid,GaugeGrid);
conformable(Umu._grid,GaugeGrid);
GaugeLinkField U(GaugeGrid);
for(int mu=0;mu<Nd;mu++){
U = PeekIndex<LorentzIndex>(Umu,mu);
PokeIndex<LorentzIndex>(Uds,U,mu);
U = adj(Cshift(U,mu,-1));
PokeIndex<LorentzIndex>(Uds,U,mu+4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat._grid);
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
int Ls=Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid);
tmp = zero;
PARALLEL_FOR_LOOP
for(int sss=0;sss<tmp._grid->oSites();sss++){
int sU=sss;
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
}
}
PokeIndex<LorentzIndex>(mat,tmp,mu);
}
};
////////////////////////////////////////////////////////////////////////////////////////
// Flavour doubled spinors; is Gparity the only? what about C*?
////////////////////////////////////////////////////////////////////////////////////////
template<class S,int Nrepresentation>
class GparityWilsonImpl : public ImplGauge<S,Nrepresentation> {
public:
typedef ImplGauge<S,Nrepresentation> Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
template<typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp >;
template<typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp >;
template<typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >, Ngp >;
typedef iImplSpinor <Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
typedef CartesianStencil<SiteSpinor,SiteHalfSpinor,Compressor> StencilImpl;
typedef GparityWilsonImplParams ImplParams;
ImplParams Params;
GparityWilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
// provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){
typedef SiteHalfSpinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj;
vobj vtmp;
sobj stmp;
GridBase *grid = St._grid;
const int Nsimd = grid->Nsimd();
int direction = St._directions[mu];
int distance = St._distances[mu];
int ptype = St._permute_type[mu];
int sl = St._grid->_simd_layout[direction];
// Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd;
// assert our assumptions
assert((distance==1)||(distance==-1)); // nearest neighbour stencil hard code
assert((sl==1)||(sl==2));
std::vector<int> icoor;
if ( SE->_around_the_world && Params.twists[mmu] ) {
if ( sl == 2 ) {
std::vector<sobj> vals(Nsimd);
extract(chi,vals);
for(int s=0;s<Nsimd;s++){
grid->iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1));
int permute_lane;
if ( distance == 1) {
permute_lane = icoor[direction]?1:0;
} else {
permute_lane = icoor[direction]?0:1;
}
if ( permute_lane ) {
stmp(0) = vals[s](1);
stmp(1) = vals[s](0);
vals[s] = stmp;
}
}
merge(vtmp,vals);
} else {
vtmp(0) = chi(1);
vtmp(1) = chi(0);
}
mult(&phi(0),&U(0)(mu),&vtmp(0));
mult(&phi(1),&U(1)(mu),&vtmp(1));
} else {
mult(&phi(0),&U(0)(mu),&chi(0));
mult(&phi(1),&U(1)(mu),&chi(1));
}
}
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds._grid,GaugeGrid);
conformable(Umu._grid,GaugeGrid);
GaugeLinkField Utmp(GaugeGrid);
GaugeLinkField U(GaugeGrid);
GaugeLinkField Uconj(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U);
// This phase could come from a simple bc 1,1,-1,1 ..
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
if ( Params.twists[mu] ) {
Uconj = where(coor==neglink,-Uconj,Uconj);
}
PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu) = U[ss]();
Uds[ss](1)(mu) = Uconj[ss]();
}
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
Uconj = adj(Cshift(Uconj,mu,-1));
Utmp = U;
if ( Params.twists[mu] ) {
Utmp = where(coor==0,Uconj,Utmp);
}
PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu+4) = Utmp[ss]();
}
Utmp = Uconj;
if ( Params.twists[mu] ) {
Utmp = where(coor==0,U,Utmp);
}
PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](1)(mu+4) = Utmp[ss]();
}
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
assert(0);
// Fixme
return;
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
assert(0);
// Fixme
return;
}
};
typedef WilsonImpl<vComplex ,Nc> WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
typedef GparityWilsonImpl<vComplex ,Nc> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF,Nc> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD,Nc> GparityWilsonImplD; // Double
}
}
#endif

View File

@ -7,35 +7,38 @@ namespace Grid {
namespace QCD {
class MobiusFermion : public CayleyFermion5D
template<class Impl>
class MobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
MobiusFermion(LatticeGaugeField &_Umu,
MobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD b, RealD c) :
RealD b, RealD c,const ImplParams &p= ImplParams()) :
CayleyFermion5D(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
std::cout << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Tanh approx"<<std::endl;
std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
// Call base setter
this->CayleyFermion5D::SetCoefficientsTanh(zdata,b,c);
this->SetCoefficientsTanh(zdata,b,c);
Approx::zolotarev_free(zdata);

View File

@ -7,26 +7,29 @@ namespace Grid {
namespace QCD {
class MobiusZolotarevFermion : public CayleyFermion5D
template<class Impl>
class MobiusZolotarevFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
MobiusZolotarevFermion(LatticeGaugeField &_Umu,
MobiusZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD b, RealD c,
RealD lo, RealD hi) :
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
CayleyFermion5D(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5)
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = lo/hi;
@ -34,10 +37,10 @@ namespace Grid {
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0);
assert(zdata->n==this->Ls);
std::cout << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
std::cout<<GridLogMessage << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
// Call base setter
this->CayleyFermion5D::SetCoefficientsZolotarev(hi,zdata,b,c);
this->SetCoefficientsZolotarev(hi,zdata,b,c);
Approx::zolotarev_free(zdata);
}

View File

@ -7,25 +7,28 @@ namespace Grid {
namespace QCD {
class OverlapWilsonCayleyTanhFermion : public MobiusFermion
template<class Impl>
class OverlapWilsonCayleyTanhFermion : public MobiusFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Constructors
OverlapWilsonCayleyTanhFermion(LatticeGaugeField &_Umu,
OverlapWilsonCayleyTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale) :
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 0 <=> b =c = scale/2
MobiusFermion(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale)
MobiusFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale,p)
{
}
};

View File

@ -7,25 +7,28 @@ namespace Grid {
namespace QCD {
class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion
template<class Impl>
class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
// Constructors
OverlapWilsonCayleyZolotarevFermion(LatticeGaugeField &_Umu,
OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo, RealD hi) :
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
// b+c=1.0, b-c = 0 <=> b =c = 1/2
MobiusZolotarevFermion(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi)
MobiusZolotarevFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi,p)
{}

View File

@ -7,31 +7,34 @@ namespace Grid {
namespace QCD {
class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D
template<class Impl>
class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonContFracTanhFermion(LatticeGaugeField &_Umu,
OverlapWilsonContFracTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale) :
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 0 <=> b =c = scale/2
ContinuedFractionFermion5D(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5)
ContinuedFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((Ls&0x1)==1); // Odd Ls required
int nrational=Ls-1;// Even rational order
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls-1;// Even rational order
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
SetCoefficientsTanh(zdata,scale);
this->SetCoefficientsTanh(zdata,scale);
Approx::zolotarev_free(zdata);
}
};

View File

@ -7,34 +7,36 @@ namespace Grid {
namespace QCD {
class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D
template<class Impl>
class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonContFracZolotarevFermion(LatticeGaugeField &_Umu,
OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi):
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
// b+c=scale, b-c = 0 <=> b =c = scale/2
ContinuedFractionFermion5D(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5)
ContinuedFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((Ls&0x1)==1); // Odd Ls required
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=Ls;// Odd rational order
int nrational=this->Ls;// Odd rational order
RealD eps = lo/hi;
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
SetCoefficientsZolotarev(hi,zdata);
this->SetCoefficientsZolotarev(hi,zdata);
Approx::zolotarev_free(zdata);
}

View File

@ -7,31 +7,34 @@ namespace Grid {
namespace QCD {
class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D
template<class Impl>
class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonPartialFractionTanhFermion(LatticeGaugeField &_Umu,
OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD scale) :
RealD scale,const ImplParams &p= ImplParams()) :
// b+c=scale, b-c = 0 <=> b =c = scale/2
PartialFractionFermion5D(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5)
PartialFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((Ls&0x1)==1); // Odd Ls required
int nrational=Ls-1;// Even rational order
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=this->Ls-1;// Even rational order
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
SetCoefficientsTanh(zdata,scale);
this->SetCoefficientsTanh(zdata,scale);
Approx::zolotarev_free(zdata);
}
};

View File

@ -7,34 +7,36 @@ namespace Grid {
namespace QCD {
class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D
template<class Impl>
class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
virtual void Instantiatable(void){};
// Constructors
OverlapWilsonPartialFractionZolotarevFermion(LatticeGaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi):
OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
// b+c=scale, b-c = 0 <=> b =c = scale/2
PartialFractionFermion5D(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5)
PartialFractionFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
assert((Ls&0x1)==1); // Odd Ls required
assert((this->Ls&0x1)==1); // Odd Ls required
int nrational=Ls;// Odd rational order
int nrational=this->Ls;// Odd rational order
RealD eps = lo/hi;
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
SetCoefficientsZolotarev(hi,zdata);
this->SetCoefficientsZolotarev(hi,zdata);
Approx::zolotarev_free(zdata);
}

View File

@ -2,12 +2,15 @@
namespace Grid {
namespace QCD {
void PartialFractionFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
template<class Impl>
void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
// this does both dag and undag but is trivial; make a common helper routing
int sign = 1;
int Ls = this->Ls;
DhopDir(psi,chi,dir,disp);
this->DhopDir(psi,chi,dir,disp);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
@ -18,15 +21,16 @@ namespace Grid {
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
void PartialFractionFermion5D::Meooe_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
{
// this does both dag and undag but is trivial; make a common helper routing
int Ls = this->Ls;
int sign = dag ? (-1) : 1;
if ( psi.checkerboard == Odd ) {
DhopEO(psi,chi,DaggerNo);
this->DhopEO(psi,chi,DaggerNo);
} else {
DhopOE(psi,chi,DaggerNo);
this->DhopOE(psi,chi,DaggerNo);
}
int nblock=(Ls-1)/2;
@ -38,10 +42,12 @@ namespace Grid {
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
void PartialFractionFermion5D::Mooee_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
template<class Impl>
void PartialFractionFermion5D<Impl>::Mooee_internal(const FermionField &psi, FermionField &chi,int dag)
{
// again dag and undag are trivially related
int sign = dag ? (-1) : 1;
int Ls = this->Ls;
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
@ -69,11 +75,13 @@ namespace Grid {
}
}
void PartialFractionFermion5D::MooeeInv_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInv_internal(const FermionField &psi, FermionField &chi,int dag)
{
int sign = dag ? (-1) : 1;
int Ls = this->Ls;
LatticeFermion tmp(psi._grid);
FermionField tmp(psi._grid);
///////////////////////////////////////////////////////////////////////////////////////
//Linv
@ -129,10 +137,12 @@ namespace Grid {
axpby_ssp (chi, 1.0/scale,tmp,0.0,tmp,Ls-1,Ls-1);
}
void PartialFractionFermion5D::M_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
template<class Impl>
void PartialFractionFermion5D<Impl>::M_internal(const FermionField &psi, FermionField &chi,int dag)
{
LatticeFermion D(psi._grid);
FermionField D(psi._grid);
int Ls = this->Ls;
int sign = dag ? (-1) : 1;
// For partial frac Hw case (b5=c5=1) chroma quirkily computes
@ -186,7 +196,7 @@ namespace Grid {
// ( 0 -sqrt(p_i)*amax | 2 R gamma_5 + p0/amax 2H
//
DW(psi,D,DaggerNo);
this->DW(psi,D,DaggerNo);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
@ -217,61 +227,127 @@ namespace Grid {
}
RealD PartialFractionFermion5D::M (const LatticeFermion &in, LatticeFermion &out)
template<class Impl>
RealD PartialFractionFermion5D<Impl>::M (const FermionField &in, FermionField &out)
{
M_internal(in,out,DaggerNo);
return norm2(out);
}
RealD PartialFractionFermion5D::Mdag (const LatticeFermion &in, LatticeFermion &out)
template<class Impl>
RealD PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
{
M_internal(in,out,DaggerYes);
return norm2(out);
}
void PartialFractionFermion5D::Meooe (const LatticeFermion &in, LatticeFermion &out)
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe (const FermionField &in, FermionField &out)
{
Meooe_internal(in,out,DaggerNo);
}
void PartialFractionFermion5D::MeooeDag (const LatticeFermion &in, LatticeFermion &out)
template<class Impl>
void PartialFractionFermion5D<Impl>::MeooeDag (const FermionField &in, FermionField &out)
{
Meooe_internal(in,out,DaggerYes);
}
void PartialFractionFermion5D::Mooee (const LatticeFermion &in, LatticeFermion &out)
template<class Impl>
void PartialFractionFermion5D<Impl>::Mooee (const FermionField &in, FermionField &out)
{
Mooee_internal(in,out,DaggerNo);
}
void PartialFractionFermion5D::MooeeDag (const LatticeFermion &in, LatticeFermion &out)
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeDag (const FermionField &in, FermionField &out)
{
Mooee_internal(in,out,DaggerYes);
}
void PartialFractionFermion5D::MooeeInv (const LatticeFermion &in, LatticeFermion &out)
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInv (const FermionField &in, FermionField &out)
{
MooeeInv_internal(in,out,DaggerNo);
}
void PartialFractionFermion5D::MooeeInvDag (const LatticeFermion &in, LatticeFermion &out)
template<class Impl>
void PartialFractionFermion5D<Impl>::MooeeInvDag (const FermionField &in, FermionField &out)
{
MooeeInv_internal(in,out,DaggerYes);
}
void PartialFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void PartialFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDeriv(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDerivOE(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
int Ls = this->Ls;
FermionField D(V._grid);
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
}
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
this->DhopDerivEO(mat,D,V,DaggerNo);
};
template<class Impl>
void PartialFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
SetCoefficientsZolotarev(1.0/scale,zdata);
}
void PartialFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
template<class Impl>
void PartialFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
// check on degree matching
// std::cout << Ls << " Ls"<<std::endl;
// std::cout << zdata->n << " - n"<<std::endl;
// std::cout << zdata->da << " -da "<<std::endl;
// std::cout << zdata->db << " -db"<<std::endl;
// std::cout << zdata->dn << " -dn"<<std::endl;
// std::cout << zdata->dd << " -dd"<<std::endl;
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
int Ls = this->Ls;
assert(Ls == (2*zdata->da -1) );
// Part frac
// RealD R;
R=(1+mass)/(1-mass);
dw_diag = (4.0-M5);
dw_diag = (4.0-this->M5);
// std::vector<RealD> p;
// std::vector<RealD> q;
@ -291,18 +367,22 @@ namespace Grid {
}
// Constructors
PartialFractionFermion5D::PartialFractionFermion5D(LatticeGaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5) :
WilsonFermion5D(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5),
template<class Impl>
PartialFractionFermion5D<Impl>::PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,
const ImplParams &p) :
WilsonFermion5D<Impl>(_Umu,
FiveDimGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimRedBlackGrid,M5,p),
mass(_mass)
{
int Ls = this->Ls;
assert((Ls&0x1)==1); // Odd Ls required
int nrational=Ls-1;
@ -321,6 +401,8 @@ namespace Grid {
}
FermOpTemplateInstantiate(PartialFractionFermion5D);
}
}

View File

@ -5,41 +5,48 @@ namespace Grid {
namespace QCD {
class PartialFractionFermion5D : public WilsonFermion5D
template<class Impl>
class PartialFractionFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
const int part_frac_chroma_convention=1;
void Meooe_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
void Mooee_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
void MooeeInv_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
void M_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
void Meooe_internal(const FermionField &in, FermionField &out,int dag);
void Mooee_internal(const FermionField &in, FermionField &out,int dag);
void MooeeInv_internal(const FermionField &in, FermionField &out,int dag);
void M_internal(const FermionField &in, FermionField &out,int dag);
// override multiply
virtual RealD M (const LatticeFermion &in, LatticeFermion &out);
virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out);
virtual RealD M (const FermionField &in, FermionField &out);
virtual RealD Mdag (const FermionField &in, FermionField &out);
// half checkerboard operaions
virtual void Meooe (const LatticeFermion &in, LatticeFermion &out);
virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out);
virtual void Mooee (const LatticeFermion &in, LatticeFermion &out);
virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out);
virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out);
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
virtual void Meooe (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
// force terms; five routines; default to Dhop on diagonal
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void Instantiatable(void) =0; // ensure no make-eee
// Efficient support for multigrid coarsening
virtual void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
// Constructors
PartialFractionFermion5D(LatticeGaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5);
PartialFractionFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
protected:

View File

@ -7,12 +7,14 @@ namespace Grid {
namespace QCD {
class ScaledShamirFermion : public MobiusFermion
template<class Impl>
class ScaledShamirFermion : public MobiusFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
// Constructors
ScaledShamirFermion(LatticeGaugeField &_Umu,
ScaledShamirFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
@ -21,7 +23,7 @@ namespace Grid {
RealD scale) :
// b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1
MobiusFermion(_Umu,
MobiusFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,

View File

@ -7,27 +7,29 @@ namespace Grid {
namespace QCD {
class ShamirZolotarevFermion : public MobiusZolotarevFermion
template<class Impl>
class ShamirZolotarevFermion : public MobiusZolotarevFermion<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
// Constructors
ShamirZolotarevFermion(LatticeGaugeField &_Umu,
ShamirZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
RealD lo, RealD hi) :
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
// b+c = 1; b-c = 1 => b=1, c=0
MobiusZolotarevFermion(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi)
MobiusZolotarevFermion<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi,p)
{}

Some files were not shown because too many files have changed in this diff Show More