mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Merge branch 'master' of https://github.com/paboyle/Grid
This commit is contained in:
commit
fb81acca3c
64
.gitignore
vendored
64
.gitignore
vendored
@ -1,55 +1,66 @@
|
|||||||
# Compiled Object files
|
# Compiled Object files #
|
||||||
|
#########################
|
||||||
*.slo
|
*.slo
|
||||||
*.lo
|
*.lo
|
||||||
*.o
|
*.o
|
||||||
*.obj
|
*.obj
|
||||||
|
|
||||||
|
|
||||||
|
# Editor files #
|
||||||
|
################
|
||||||
*~
|
*~
|
||||||
errs
|
|
||||||
*#
|
*#
|
||||||
|
|
||||||
# Precompiled Headers
|
# Precompiled Headers #
|
||||||
|
#######################
|
||||||
*.gch
|
*.gch
|
||||||
*.pch
|
*.pch
|
||||||
|
|
||||||
# Compiled Dynamic libraries
|
# Compiled Dynamic libraries #
|
||||||
|
##############################
|
||||||
*.so
|
*.so
|
||||||
*.dylib
|
*.dylib
|
||||||
*.dll
|
*.dll
|
||||||
|
|
||||||
# Fortran module files
|
# Fortran module files #
|
||||||
|
########################
|
||||||
*.mod
|
*.mod
|
||||||
|
|
||||||
# Compiled Static libraries
|
# Compiled Static libraries #
|
||||||
|
#############################
|
||||||
*.lai
|
*.lai
|
||||||
*.la
|
*.la
|
||||||
*.a
|
*.a
|
||||||
*.lib
|
*.lib
|
||||||
|
|
||||||
# Executables
|
# Executables #
|
||||||
|
###############
|
||||||
*.exe
|
*.exe
|
||||||
*.out
|
*.out
|
||||||
*.app
|
*.app
|
||||||
# http://www.gnu.org/software/automake
|
|
||||||
|
|
||||||
|
# http://www.gnu.org/software/automake #
|
||||||
|
########################################
|
||||||
Makefile.in
|
Makefile.in
|
||||||
Makefile
|
Makefile
|
||||||
|
Config.h
|
||||||
config.log
|
config.log
|
||||||
config.status
|
config.status
|
||||||
.deps
|
.deps
|
||||||
|
|
||||||
# http://www.gnu.org/software/autoconf
|
# http://www.gnu.org/software/autoconf #
|
||||||
|
########################################
|
||||||
/autom4te.cache
|
autom4te.cache
|
||||||
/aclocal.m4
|
aclocal.m4
|
||||||
/compile
|
compile
|
||||||
/configure
|
configure
|
||||||
/depcomp
|
depcomp
|
||||||
/install-sh
|
install-sh
|
||||||
/missing
|
missing
|
||||||
/stamp-h1
|
stamp-h1
|
||||||
/config.sub
|
config.sub
|
||||||
/config.guess
|
config.guess
|
||||||
/INSTALL
|
INSTALL
|
||||||
|
|
||||||
# Packages #
|
# Packages #
|
||||||
############
|
############
|
||||||
@ -78,4 +89,13 @@ config.status
|
|||||||
.Spotlight-V100
|
.Spotlight-V100
|
||||||
.Trashes
|
.Trashes
|
||||||
ehthumbs.db
|
ehthumbs.db
|
||||||
Thumbs.db
|
Thumbs.db
|
||||||
|
|
||||||
|
# build directory #
|
||||||
|
###################
|
||||||
|
build/*
|
||||||
|
|
||||||
|
# IDE related files #
|
||||||
|
#####################
|
||||||
|
*.xcodeproj/*
|
||||||
|
build.sh
|
||||||
|
125
TODO
125
TODO
@ -1,15 +1,61 @@
|
|||||||
|
RECENT
|
||||||
|
---------------
|
||||||
|
|
||||||
|
- Clean up HMC -- DONE
|
||||||
|
- LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE
|
||||||
|
- Simplified the integrators a bit. -- DONE
|
||||||
|
- Multi-timescale looks broken and operating on single timescale for now. -- DONE
|
||||||
|
- pass GaugeField as template param. -- DONE
|
||||||
|
- Reunitarise -- DONE
|
||||||
|
- Force Gradient -- DONE
|
||||||
|
- Prefer "RefreshInternal" or such like to "init" in naming -- DONE
|
||||||
|
- Parallel io improvements -- DONE
|
||||||
|
- Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
|
||||||
|
|
||||||
|
TODO:
|
||||||
|
---------------
|
||||||
|
Policies:
|
||||||
|
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
|
||||||
|
* Support different boundary conditions (finite temp, chem. potential ... )
|
||||||
|
* Support different fermion representations?
|
||||||
|
- contained entirely within the integrator presently
|
||||||
|
|
||||||
|
- Sign of force term.
|
||||||
|
|
||||||
|
- Reversibility test.
|
||||||
|
|
||||||
|
- Rename "Ta" as too unclear
|
||||||
|
|
||||||
|
- Lanczos
|
||||||
|
|
||||||
|
- Rectangle gauge actions.
|
||||||
|
Iwasaki,
|
||||||
|
Symanzik,
|
||||||
|
... etc...
|
||||||
|
|
||||||
|
- Prepare multigrid for HMC. - Alternate setup schemes.
|
||||||
|
|
||||||
|
- Support for ILDG --- ugly, not done
|
||||||
|
|
||||||
|
- Flavour matrices?
|
||||||
|
|
||||||
|
- FFTnD ?
|
||||||
|
|
||||||
================================================================
|
================================================================
|
||||||
*** Hacks and bug fixes to clean up and Audits
|
* Hacks and bug fixes to clean up and Audits
|
||||||
================================================================
|
================================================================
|
||||||
|
|
||||||
* Extract/merge/set cleanup ; too many variants; rationalise and call simpler ones
|
* Extract/merge/set cleanup ; too many variants; rationalise and call simpler ones
|
||||||
* Used #define repetitive sequences to minimise code.
|
|
||||||
* Rewrite core tensor arithmetic support to be more systematic
|
* Rewrite core tensor arithmetic support to be more systematic
|
||||||
|
= Use #define repetitive sequences to minimise code, decrease line count by thousands possible,
|
||||||
|
with more robust and maintainable implementation.
|
||||||
|
|
||||||
* Ensure we ET as much as possible; move unop functions into ET framework.
|
* Ensure we ET as much as possible; move unop functions into ET framework.
|
||||||
- tests with expression args to all functions
|
- tests with expression args to all functions
|
||||||
|
|
||||||
|
|
||||||
* FIXME audit
|
* FIXME audit
|
||||||
|
|
||||||
* const audit
|
* const audit
|
||||||
|
|
||||||
Insert/Extract
|
Insert/Extract
|
||||||
@ -22,10 +68,12 @@ Insert/Extract
|
|||||||
|
|
||||||
* Thread scaling tests Xeon, XeonPhi
|
* Thread scaling tests Xeon, XeonPhi
|
||||||
|
|
||||||
** Make the Tensor types and Complex etc... play more nicely.
|
Not sure of status of this -- reverify. Things are working nicely now though.
|
||||||
|
|
||||||
|
* Make the Tensor types and Complex etc... play more nicely.
|
||||||
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
||||||
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
||||||
want to introduce a syntax that does not require this.
|
want to introduce a syntax that does not require this.
|
||||||
|
|
||||||
- Reductions that contract indices on a site should always demote the tensor structure.
|
- Reductions that contract indices on a site should always demote the tensor structure.
|
||||||
norm2(), innerProduct.
|
norm2(), innerProduct.
|
||||||
@ -38,6 +86,37 @@ Insert/Extract
|
|||||||
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
|
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
|
||||||
pure scalar.
|
pure scalar.
|
||||||
|
|
||||||
|
======================================================================
|
||||||
|
======================================================================
|
||||||
|
======================================================================
|
||||||
|
======================================================================
|
||||||
|
Done: Cayley, Partial , ContFrac force terms.
|
||||||
|
|
||||||
|
DONE
|
||||||
|
- PseudoFermions
|
||||||
|
=> generalise to non-const EE ; likely defer (??) (NOT DONE)
|
||||||
|
Done:
|
||||||
|
- TwoFlavour
|
||||||
|
- TwoFlavourEvenOdd
|
||||||
|
- TwoFlavourRatio
|
||||||
|
- TwoFlavourRatioEvenOdd
|
||||||
|
|
||||||
|
Done:
|
||||||
|
- OneFlavourRationalEvenOdd
|
||||||
|
- OneFlavourRationalRatioEvenOdd
|
||||||
|
- OneFlavourRationalRatio
|
||||||
|
|
||||||
|
Done
|
||||||
|
=> Test DWF HMC
|
||||||
|
- Fix a threading bug that has been introduced and prevents HMC running hybrid OMP mode
|
||||||
|
|
||||||
|
Done:
|
||||||
|
- RNG filling from sparser grid, lower dim grid.
|
||||||
|
|
||||||
|
|
||||||
|
DONE
|
||||||
|
- MacroMagic -> virtual reader class.
|
||||||
|
|
||||||
*** Expression template engine: -- DONE
|
*** Expression template engine: -- DONE
|
||||||
|
|
||||||
[ -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
|
[ -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
|
||||||
@ -54,28 +133,13 @@ Insert/Extract
|
|||||||
// localMaxAbs
|
// localMaxAbs
|
||||||
// Fourier transform equivalent.]
|
// Fourier transform equivalent.]
|
||||||
|
|
||||||
================================================================
|
|
||||||
*** New Functionality
|
|
||||||
================================================================
|
|
||||||
|
|
||||||
* - BinaryWriter, TextWriter etc...
|
|
||||||
- use protocol buffers? replace xmlReader/Writer ec..
|
|
||||||
- Binary use htonll, htonl
|
|
||||||
|
|
||||||
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
||||||
|
|
||||||
* Parallel io improvements
|
-- coherent framework for implementing actions and their forces.
|
||||||
- optional parallel MPI2 IO
|
Actions
|
||||||
- move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
|
|
||||||
|
|
||||||
* Support for ILDG
|
|
||||||
|
|
||||||
* Support different boundary conditions (finite temp, chem. potential ... )
|
|
||||||
|
|
||||||
* Support different fermion representations?
|
|
||||||
|
|
||||||
Actions -- coherent framework for implementing actions and their forces.
|
|
||||||
|
|
||||||
|
DONE
|
||||||
* Fermion
|
* Fermion
|
||||||
- Wilson
|
- Wilson
|
||||||
- Clover
|
- Clover
|
||||||
@ -83,6 +147,7 @@ Actions -- coherent framework for implementing actions and their forces.
|
|||||||
- Mobius
|
- Mobius
|
||||||
- z-Mobius
|
- z-Mobius
|
||||||
|
|
||||||
|
|
||||||
Algorithms (lots of reuse/port from BFM)
|
Algorithms (lots of reuse/port from BFM)
|
||||||
* LinearOperator
|
* LinearOperator
|
||||||
* LinearSolver
|
* LinearSolver
|
||||||
@ -100,17 +165,10 @@ Algorithms (lots of reuse/port from BFM)
|
|||||||
* Integrators, leapfrog, omelyan, force gradient etc...
|
* Integrators, leapfrog, omelyan, force gradient etc...
|
||||||
* etc..
|
* etc..
|
||||||
|
|
||||||
* Gauge
|
Done
|
||||||
- Wilson, symanzik, iwasaki
|
|
||||||
|
|
||||||
* rb4d support for 5th dimension in Mobius.
|
|
||||||
|
|
||||||
* Flavour matrices?
|
|
||||||
* Pauli, SU subgroup, etc..
|
* Pauli, SU subgroup, etc..
|
||||||
* su3 exponentiation & log etc.. [Jamie's code?]
|
|
||||||
* TaProj
|
|
||||||
* FFTnD ?
|
|
||||||
|
|
||||||
|
* su3 exponentiation & log etc.. [Jamie's code?]
|
||||||
|
|
||||||
======================================================================================================
|
======================================================================================================
|
||||||
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
|
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
|
||||||
@ -144,7 +202,6 @@ FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me argua
|
|||||||
- lib/communicator
|
- lib/communicator
|
||||||
- lib/algorithms
|
- lib/algorithms
|
||||||
- lib/qcd
|
- lib/qcd
|
||||||
future
|
|
||||||
- lib/io/ -- GridLog, GridIn, GridErr, GridDebug, GridMessage
|
- lib/io/ -- GridLog, GridIn, GridErr, GridDebug, GridMessage
|
||||||
- lib/qcd/actions
|
- lib/qcd/actions
|
||||||
- lib/qcd/measurements
|
- lib/qcd/measurements
|
||||||
|
@ -11,22 +11,25 @@ int main (int argc, char ** argv)
|
|||||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
int Nloop=10;
|
int Nloop=10;
|
||||||
int nmu=0;
|
int nmu=0;
|
||||||
for(int mu=0;mu<4;mu++) if (mpi_layout[mu]>1) nmu++;
|
for(int mu=0;mu<4;mu++) if (mpi_layout[mu]>1) nmu++;
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=2){
|
for(int lat=4;lat<=32;lat+=2){
|
||||||
for(int Ls=1;Ls<=16;Ls*=2){
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||||
|
lat*mpi_layout[1],
|
||||||
|
lat*mpi_layout[2],
|
||||||
|
lat*mpi_layout[3]});
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
@ -84,15 +87,15 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
double time = stop-start; // microseconds
|
double time = stop-start; // microseconds
|
||||||
|
|
||||||
std::cout << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=2){
|
for(int lat=4;lat<=32;lat+=2){
|
||||||
@ -160,7 +163,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
double time = stop-start;
|
double time = stop-start;
|
||||||
|
|
||||||
std::cout << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,11 +21,11 @@ int main (int argc, char ** argv)
|
|||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
std::vector<int> latt4 = GridDefaultLatt();
|
std::vector<int> latt4 = GridDefaultLatt();
|
||||||
const int Ls=8;
|
const int Ls=16;
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
@ -59,7 +59,7 @@ int main (int argc, char ** argv)
|
|||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
std::vector<LatticeColourMatrix> U(4,FGrid);
|
std::vector<LatticeColourMatrix> U(4,FGrid);
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
U[mu] = peekIndex<LorentzIndex>(Umu5d,mu);
|
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
@ -79,25 +79,28 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
DomainWallFermion Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
std::cout << "Calling Dw"<<std::endl;
|
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
||||||
int ncall=10;
|
int ncall=10000;
|
||||||
double t0=usecond();
|
{
|
||||||
for(int i=0;i<ncall;i++){
|
double t0=usecond();
|
||||||
Dw.Dhop(src,result,0);
|
for(int i=0;i<ncall;i++){
|
||||||
}
|
Dw.Dhop(src,result,0);
|
||||||
double t1=usecond();
|
}
|
||||||
|
double t1=usecond();
|
||||||
|
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=1344*volume*ncall;
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||||
double flops=1344*volume*ncall;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
std::cout << "Called Dw"<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout << "norm result "<< norm2(result)<<std::endl;
|
err = ref-result;
|
||||||
std::cout << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
Dw.Report();
|
||||||
err = ref-result;
|
}
|
||||||
std::cout << "norm diff "<< norm2(err)<<std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
@ -120,11 +123,11 @@ int main (int argc, char ** argv)
|
|||||||
ref = -0.5*ref;
|
ref = -0.5*ref;
|
||||||
}
|
}
|
||||||
Dw.Dhop(src,result,1);
|
Dw.Dhop(src,result,1);
|
||||||
std::cout << "Called DwDag"<<std::endl;
|
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
||||||
std::cout << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
|
||||||
LatticeFermion src_e (FrbGrid);
|
LatticeFermion src_e (FrbGrid);
|
||||||
LatticeFermion src_o (FrbGrid);
|
LatticeFermion src_o (FrbGrid);
|
||||||
@ -133,24 +136,44 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion r_eo (FGrid);
|
LatticeFermion r_eo (FGrid);
|
||||||
|
|
||||||
|
|
||||||
std::cout << "Calling Deo and Doe"<<std::endl;
|
std::cout<<GridLogMessage << "Calling Deo and Doe"<<std::endl;
|
||||||
pickCheckerboard(Even,src_e,src);
|
pickCheckerboard(Even,src_e,src);
|
||||||
pickCheckerboard(Odd,src_o,src);
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
|
||||||
|
|
||||||
|
{
|
||||||
|
double t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
|
}
|
||||||
|
double t1=usecond();
|
||||||
|
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
Dw.DhopOE(src_e,r_o,DaggerNo);
|
Dw.DhopOE(src_e,r_o,DaggerNo);
|
||||||
Dw.Dhop(src,result,DaggerNo);
|
Dw.Dhop (src ,result,DaggerNo);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "r_e"<<norm2(r_e)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "r_o"<<norm2(r_o)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "res"<<norm2(result)<<std::endl;
|
||||||
|
|
||||||
setCheckerboard(r_eo,r_o);
|
setCheckerboard(r_eo,r_o);
|
||||||
setCheckerboard(r_eo,r_e);
|
setCheckerboard(r_eo,r_e);
|
||||||
|
|
||||||
err = r_eo-result;
|
err = r_eo-result;
|
||||||
std::cout << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
|
||||||
pickCheckerboard(Even,src_e,err);
|
pickCheckerboard(Even,src_e,err);
|
||||||
pickCheckerboard(Odd,src_o,err);
|
pickCheckerboard(Odd,src_o,err);
|
||||||
std::cout << "norm diff even "<< norm2(src_e)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
||||||
std::cout << "norm diff odd "<< norm2(src_o)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
84
benchmarks/Benchmark_memory_asynch.cc
Normal file
84
benchmarks/Benchmark_memory_asynch.cc
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Nvec=8;
|
||||||
|
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
||||||
|
typedef iVector<vReal,Nvec> Vec;
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
||||||
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking READ bandwidth"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"bytes/thread"<<"\t\t\t"<<"GB/s"<<"\t\t\t"<<"GB/s per thread"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
|
const int lmax = 16536*16;
|
||||||
|
for(int lat=4;lat<=lmax;lat*=2){
|
||||||
|
|
||||||
|
int Nloop=lmax*4/lat;
|
||||||
|
|
||||||
|
std::vector<int> latt_size ({2*mpi_layout[0],2*mpi_layout[1],4*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]*threads;
|
||||||
|
|
||||||
|
Vec tsum; tsum = zero;
|
||||||
|
|
||||||
|
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||||
|
|
||||||
|
std::vector<double> stop(threads);
|
||||||
|
Vector<Vec> sum(threads);
|
||||||
|
|
||||||
|
std::vector<LatticeVec> x(threads,&Grid);
|
||||||
|
for(int t=0;t<threads;t++){
|
||||||
|
// random(pRNG,x[t]);
|
||||||
|
}
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int t=0;t<threads;t++){
|
||||||
|
|
||||||
|
sum[t] = x[t]._odata[0];
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
for(auto ss=x[t].begin();ss<x[t].end();ss++){
|
||||||
|
sum[t] = sum[t]+x[t]._odata[ss];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stop[t]=usecond();
|
||||||
|
}
|
||||||
|
|
||||||
|
double max_stop=stop[0];
|
||||||
|
double min_stop=stop[0];
|
||||||
|
|
||||||
|
for(int t=0;t<threads;t++){
|
||||||
|
tsum+=sum[t];
|
||||||
|
if ( stop[t]<min_stop ) min_stop=stop[t];
|
||||||
|
if ( stop[t]>max_stop ) max_stop=stop[t];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double max_time = (max_stop-start)/Nloop*1000;
|
||||||
|
double min_time = (min_stop-start)/Nloop*1000;
|
||||||
|
|
||||||
|
double bytes=vol*Nvec*sizeof(Real);
|
||||||
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/threads<<"\t\t"<<bytes/max_time<<" - "<< bytes/min_time<<"\t\t"<<bytes/min_time/threads <<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -10,28 +10,34 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
const int Nvec=8;
|
const int Nvec=8;
|
||||||
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
||||||
|
typedef iVector<vReal,Nvec> Vec;
|
||||||
|
|
||||||
int Nloop=1000;
|
|
||||||
|
Vec rn = zero;
|
||||||
|
|
||||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
|
||||||
std::cout << "= Benchmarking fused AXPY bandwidth"<<std::endl;
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
|
||||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
|
||||||
std::cout << "----------------------------------------------------------"<<std::endl;
|
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=4){
|
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking fused AXPY bandwidth ; sizeof(Real) "<<sizeof(Real)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
uint64_t lmax=44;
|
||||||
|
#define NLOOP (1*lmax*lmax*lmax*lmax/vol)
|
||||||
|
for(int lat=4;lat<=lmax;lat+=4){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
uint64_t Nloop=NLOOP;
|
||||||
|
|
||||||
|
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||||
|
|
||||||
LatticeVec z(&Grid); //random(pRNG,z);
|
LatticeVec z(&Grid); //random(pRNG,z);
|
||||||
LatticeVec x(&Grid); //random(pRNG,x);
|
LatticeVec x(&Grid); //random(pRNG,x);
|
||||||
@ -41,63 +47,70 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
// inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
|
||||||
axpy(z,a,x,y);
|
axpy(z,a,x,y);
|
||||||
|
x._odata[0]=z._odata[0]; // serial loop dependence to prevent optimise
|
||||||
|
y._odata[4]=z._odata[4];
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000;
|
double time = (stop-start)/Nloop*1000;
|
||||||
|
|
||||||
double flops=vol*Nvec*2;// mul,add
|
double flops=vol*Nvec*2;// mul,add
|
||||||
double bytes=3*vol*Nvec*sizeof(Real);
|
double bytes=3*vol*Nvec*sizeof(Real);
|
||||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << "= Benchmarking a*x + y bandwidth"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl;
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||||
std::cout << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=4){
|
for(int lat=4;lat<=lmax;lat+=4){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||||
|
|
||||||
LatticeVec z(&Grid); //random(pRNG,z);
|
LatticeVec z(&Grid); //random(pRNG,z);
|
||||||
LatticeVec x(&Grid); //random(pRNG,x);
|
LatticeVec x(&Grid); //random(pRNG,x);
|
||||||
LatticeVec y(&Grid); //random(pRNG,y);
|
LatticeVec y(&Grid); //random(pRNG,y);
|
||||||
double a=2.0;
|
double a=2.0;
|
||||||
|
|
||||||
|
uint64_t Nloop=NLOOP;
|
||||||
|
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
z=a*x-y;
|
z=a*x-y;
|
||||||
|
x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away
|
||||||
|
y._odata[4]=z._odata[4];
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000;
|
double time = (stop-start)/Nloop*1000;
|
||||||
|
|
||||||
double flops=vol*Nvec*2;// mul,add
|
double flops=vol*Nvec*2;// mul,add
|
||||||
double bytes=3*vol*Nvec*sizeof(Real);
|
double bytes=3*vol*Nvec*sizeof(Real);
|
||||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << "= Benchmarking SCALE bandwidth"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SCALE bandwidth"<<std::endl;
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||||
|
|
||||||
|
for(int lat=4;lat<=lmax;lat+=4){
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=4){
|
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
uint64_t Nloop=NLOOP;
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||||
|
|
||||||
LatticeVec z(&Grid); //random(pRNG,z);
|
LatticeVec z(&Grid); //random(pRNG,z);
|
||||||
LatticeVec x(&Grid); //random(pRNG,x);
|
LatticeVec x(&Grid); //random(pRNG,x);
|
||||||
@ -108,46 +121,47 @@ int main (int argc, char ** argv)
|
|||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
z=a*x;
|
z=a*x;
|
||||||
|
x._odata[0]=z._odata[0]*2.0;
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000;
|
double time = (stop-start)/Nloop*1000;
|
||||||
|
|
||||||
double bytes=2*vol*Nvec*sizeof(Real);
|
double bytes=2*vol*Nvec*sizeof(Real);
|
||||||
double flops=vol*Nvec*1;// mul
|
double flops=vol*Nvec*1;// mul
|
||||||
std::cout <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << "= Benchmarking READ bandwidth"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking READ bandwidth"<<std::endl;
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||||
std::cout << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=4){
|
for(int lat=4;lat<=lmax;lat+=4){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
uint64_t Nloop=NLOOP;
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||||
|
|
||||||
LatticeVec z(&Grid); //random(pRNG,z);
|
LatticeVec z(&Grid); //random(pRNG,z);
|
||||||
LatticeVec x(&Grid); //random(pRNG,x);
|
LatticeVec x(&Grid); //random(pRNG,x);
|
||||||
LatticeVec y(&Grid); //random(pRNG,y);
|
LatticeVec y(&Grid); //random(pRNG,y);
|
||||||
RealD a=2.0;
|
RealD a=2.0;
|
||||||
ComplexD nn;
|
Real nn;
|
||||||
|
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
nn=norm2(x);
|
nn=norm2(x);
|
||||||
|
vsplat(x._odata[0]._internal[0],nn);
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000;
|
double time = (stop-start)/Nloop*1000;
|
||||||
|
|
||||||
double bytes=vol*Nvec*sizeof(Real);
|
double bytes=vol*Nvec*sizeof(Real);
|
||||||
double flops=vol*Nvec*2;// mul,add
|
double flops=vol*Nvec*2;// mul,add
|
||||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,15 +14,15 @@ int main (int argc, char ** argv)
|
|||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=24;lat+=2){
|
for(int lat=2;lat<=32;lat+=2){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -43,18 +43,18 @@ int main (int argc, char ** argv)
|
|||||||
double bytes=3.0*vol*Nc*Nc*sizeof(Complex);
|
double bytes=3.0*vol*Nc*Nc*sizeof(Complex);
|
||||||
double footprint=2.0*vol*Nc*Nc*sizeof(Complex);
|
double footprint=2.0*vol*Nc*Nc*sizeof(Complex);
|
||||||
double flops=Nc*Nc*(6.0+8.0+8.0)*vol;
|
double flops=Nc*Nc*(6.0+8.0+8.0)*vol;
|
||||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<footprint<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<footprint<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << "= Benchmarking SU3xSU3 z= x*y"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x*y"<<std::endl;
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=24;lat+=2){
|
for(int lat=2;lat<=32;lat+=2){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -75,17 +75,17 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||||
double flops=Nc*Nc*(6+8+8)*vol;
|
double flops=Nc*Nc*(6+8+8)*vol;
|
||||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << "= Benchmarking SU3xSU3 mult(z,x,y)"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 mult(z,x,y)"<<std::endl;
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=24;lat+=2){
|
for(int lat=2;lat<=32;lat+=2){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -106,17 +106,17 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||||
double flops=Nc*Nc*(6+8+8)*vol;
|
double flops=Nc*Nc*(6+8+8)*vol;
|
||||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << "= Benchmarking SU3xSU3 mac(z,x,y)"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 mac(z,x,y)"<<std::endl;
|
||||||
std::cout << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=24;lat+=2){
|
for(int lat=2;lat<=32;lat+=2){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -137,7 +137,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||||
double flops=Nc*Nc*(8+8+8)*vol;
|
double flops=Nc*Nc*(8+8+8)*vol;
|
||||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,13 +22,16 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
|
|
||||||
std::vector<int> latt_size = GridDefaultLatt();
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
|
||||||
|
|
||||||
std::vector<int> seeds({1,2,3,4});
|
std::vector<int> seeds({1,2,3,4});
|
||||||
GridParallelRNG pRNG(&Grid);
|
GridParallelRNG pRNG(&Grid);
|
||||||
@ -55,15 +58,15 @@ int main (int argc, char ** argv)
|
|||||||
for(int nn=0;nn<Nd;nn++){
|
for(int nn=0;nn<Nd;nn++){
|
||||||
random(pRNG,U[nn]);
|
random(pRNG,U[nn]);
|
||||||
if(0) {
|
if(0) {
|
||||||
if (nn==-1) { U[nn]=zero; std::cout << "zeroing gauge field in dir "<<nn<<std::endl; }
|
if (nn==-1) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
|
||||||
else { U[nn] = cone;std::cout << "unit gauge field in dir "<<nn<<std::endl; }
|
else { U[nn] = cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
|
||||||
}
|
}
|
||||||
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
|
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
{ // Naive wilson implementation
|
{ // Naive wilson implementation
|
||||||
@ -84,10 +87,10 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
ref = -0.5*ref;
|
ref = -0.5*ref;
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
WilsonFermion Dw(Umu,Grid,RBGrid,mass);
|
WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
|
||||||
|
|
||||||
std::cout << "Calling Dw"<<std::endl;
|
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
||||||
int ncall=10000;
|
int ncall=1000;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
Dw.Dhop(src,result,0);
|
Dw.Dhop(src,result,0);
|
||||||
@ -95,12 +98,12 @@ int main (int argc, char ** argv)
|
|||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
double flops=1344*volume*ncall;
|
double flops=1344*volume*ncall;
|
||||||
|
|
||||||
std::cout << "Called Dw"<<std::endl;
|
std::cout<<GridLogMessage << "Called Dw"<<std::endl;
|
||||||
std::cout << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
std::cout << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
// for(int ss=0;ss<10;ss++ ){
|
// for(int ss=0;ss<10;ss++ ){
|
||||||
@ -109,7 +112,7 @@ int main (int argc, char ** argv)
|
|||||||
for(int j=0;j<Nc;j++){
|
for(int j=0;j<Nc;j++){
|
||||||
ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j);
|
ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j);
|
||||||
ComplexF * res_p = (ComplexF *)&result._odata[ss]()(i)(j);
|
ComplexF * res_p = (ComplexF *)&result._odata[ss]()(i)(j);
|
||||||
std::cout << ss<< " "<<i<<" "<<j<<" "<< (*ref_p)<<" " <<(*res_p)<<std::endl;
|
std::cout<<GridLogMessage << ss<< " "<<i<<" "<<j<<" "<< (*ref_p)<<" " <<(*res_p)<<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -133,11 +136,11 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
ref = -0.5*ref;
|
ref = -0.5*ref;
|
||||||
Dw.Dhop(src,result,1);
|
Dw.Dhop(src,result,1);
|
||||||
std::cout << "Called DwDag"<<std::endl;
|
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
||||||
std::cout << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
|
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
|
||||||
|
|
||||||
|
|
||||||
Benchmark_comms_SOURCES=Benchmark_comms.cc
|
Benchmark_comms_SOURCES=Benchmark_comms.cc
|
||||||
@ -10,6 +10,10 @@ Benchmark_dwf_SOURCES=Benchmark_dwf.cc
|
|||||||
Benchmark_dwf_LDADD=-lGrid
|
Benchmark_dwf_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
|
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
|
||||||
|
Benchmark_memory_asynch_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
|
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
|
||||||
Benchmark_memory_bandwidth_LDADD=-lGrid
|
Benchmark_memory_bandwidth_LDADD=-lGrid
|
||||||
|
|
||||||
|
82
configure.ac
82
configure.ac
@ -3,7 +3,7 @@
|
|||||||
#
|
#
|
||||||
# Project Grid package
|
# Project Grid package
|
||||||
#
|
#
|
||||||
# Time-stamp: <2015-06-09 15:26:39 neo>
|
# Time-stamp: <2015-07-10 17:46:21 neo>
|
||||||
|
|
||||||
AC_PREREQ([2.63])
|
AC_PREREQ([2.63])
|
||||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
||||||
@ -11,7 +11,7 @@ AC_CANONICAL_SYSTEM
|
|||||||
AM_INIT_AUTOMAKE(subdir-objects)
|
AM_INIT_AUTOMAKE(subdir-objects)
|
||||||
AC_CONFIG_MACRO_DIR([m4])
|
AC_CONFIG_MACRO_DIR([m4])
|
||||||
AC_CONFIG_SRCDIR([lib/Grid.h])
|
AC_CONFIG_SRCDIR([lib/Grid.h])
|
||||||
AC_CONFIG_HEADERS([lib/GridConfig.h])
|
AC_CONFIG_HEADERS([lib/Config.h])
|
||||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||||
|
|
||||||
AC_MSG_NOTICE([
|
AC_MSG_NOTICE([
|
||||||
@ -26,10 +26,9 @@ AC_LANG(C++)
|
|||||||
AC_PROG_CXX
|
AC_PROG_CXX
|
||||||
AC_OPENMP
|
AC_OPENMP
|
||||||
AC_PROG_RANLIB
|
AC_PROG_RANLIB
|
||||||
AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
|
#AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
|
||||||
AX_EXT
|
AX_EXT
|
||||||
|
|
||||||
|
|
||||||
# Checks for libraries.
|
# Checks for libraries.
|
||||||
#AX_GCC_VAR_ATTRIBUTE(aligned)
|
#AX_GCC_VAR_ATTRIBUTE(aligned)
|
||||||
|
|
||||||
@ -39,6 +38,7 @@ AC_CHECK_HEADERS(mm_malloc.h)
|
|||||||
AC_CHECK_HEADERS(malloc/malloc.h)
|
AC_CHECK_HEADERS(malloc/malloc.h)
|
||||||
AC_CHECK_HEADERS(malloc.h)
|
AC_CHECK_HEADERS(malloc.h)
|
||||||
AC_CHECK_HEADERS(endian.h)
|
AC_CHECK_HEADERS(endian.h)
|
||||||
|
AC_CHECK_HEADERS(execinfo.h)
|
||||||
AC_CHECK_HEADERS(gmp.h)
|
AC_CHECK_HEADERS(gmp.h)
|
||||||
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
||||||
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
||||||
@ -56,19 +56,18 @@ echo :::::::::::::::::::::::::::::::::::::::::::
|
|||||||
|
|
||||||
AC_CHECK_FUNCS([gettimeofday])
|
AC_CHECK_FUNCS([gettimeofday])
|
||||||
|
|
||||||
AC_CHECK_LIB([gmp],[__gmpf_init],,
|
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
||||||
[AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
||||||
Please install or provide the correct path to your installation
|
#Please install or provide the correct path to your installation
|
||||||
Info at: http://www.gmplib.org)])
|
#Info at: http://www.gmplib.org)])
|
||||||
|
|
||||||
AC_CHECK_LIB([mpfr],[mpfr_init],,
|
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
||||||
[AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
||||||
Please install or provide the correct path to your installation
|
#Please install or provide the correct path to your installation
|
||||||
Info at: http://www.mpfr.org/)])
|
#Info at: http://www.mpfr.org/)])
|
||||||
|
|
||||||
|
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
|
||||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
|
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
|
||||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
|
|
||||||
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
|
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
|
||||||
|
|
||||||
supported=no
|
supported=no
|
||||||
@ -92,6 +91,15 @@ case ${ac_SIMD} in
|
|||||||
AC_MSG_WARN([Your processor does not support AVX instructions])
|
AC_MSG_WARN([Your processor does not support AVX instructions])
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
|
AVXFMA4)
|
||||||
|
echo Configuring for AVX
|
||||||
|
AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] )
|
||||||
|
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
|
||||||
|
supported=yes
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor does not support AVX instructions])
|
||||||
|
fi
|
||||||
|
;;
|
||||||
AVX2)
|
AVX2)
|
||||||
echo Configuring for AVX2
|
echo Configuring for AVX2
|
||||||
AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
|
AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
|
||||||
@ -101,14 +109,19 @@ case ${ac_SIMD} in
|
|||||||
AC_MSG_WARN([Your processor does not support AVX2 instructions])
|
AC_MSG_WARN([Your processor does not support AVX2 instructions])
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
AVX512|MIC)
|
AVX512)
|
||||||
echo Configuring for AVX512 and MIC
|
echo Configuring for AVX512
|
||||||
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Corner] )
|
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
|
||||||
supported="cross compilation"
|
supported="cross compilation"
|
||||||
;;
|
;;
|
||||||
NEONv7)
|
IMCI)
|
||||||
echo Configuring for experimental ARMv7 support
|
echo Configuring for IMCI
|
||||||
AC_DEFINE([NEONv7],[1],[NEON ARMv7 Experimental support ] )
|
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
|
||||||
|
supported="cross compilation"
|
||||||
|
;;
|
||||||
|
NEONv8)
|
||||||
|
echo Configuring for experimental ARMv8a support
|
||||||
|
AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] )
|
||||||
supported="cross compilation"
|
supported="cross compilation"
|
||||||
;;
|
;;
|
||||||
DEBUG)
|
DEBUG)
|
||||||
@ -120,6 +133,17 @@ case ${ac_SIMD} in
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
||||||
|
case ${ac_PRECISION} in
|
||||||
|
single)
|
||||||
|
echo default precision is single
|
||||||
|
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
||||||
|
;;
|
||||||
|
double)
|
||||||
|
echo default precision is double
|
||||||
|
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
||||||
|
|
||||||
@ -144,15 +168,15 @@ AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
|
|||||||
###################################################################
|
###################################################################
|
||||||
# Checks for doxygen support
|
# Checks for doxygen support
|
||||||
# if present enables the "make doxyfile" command
|
# if present enables the "make doxyfile" command
|
||||||
echo
|
#echo
|
||||||
echo Checking doxygen support
|
#echo Checking doxygen support
|
||||||
echo :::::::::::::::::::::::::::::::::::::::::::
|
#echo :::::::::::::::::::::::::::::::::::::::::::
|
||||||
AC_PROG_DOXYGEN
|
#AC_PROG_DOXYGEN
|
||||||
|
|
||||||
if test -n "$DOXYGEN"
|
#if test -n "$DOXYGEN"
|
||||||
then
|
#then
|
||||||
AC_CONFIG_FILES([docs/doxy.cfg])
|
#AC_CONFIG_FILES([docs/doxy.cfg])
|
||||||
fi
|
#fi
|
||||||
|
|
||||||
echo
|
echo
|
||||||
echo Creating configuration files
|
echo Creating configuration files
|
||||||
|
@ -29,12 +29,12 @@ public:
|
|||||||
|
|
||||||
template<int N,class obj,typename std::enable_if<N==obj::NestLevel >::type * = nullptr > auto function(const obj &arg)-> obj
|
template<int N,class obj,typename std::enable_if<N==obj::NestLevel >::type * = nullptr > auto function(const obj &arg)-> obj
|
||||||
{
|
{
|
||||||
std::cout<<"Leaf "<<obj::NestLevel<<std::endl;
|
std::cout<<GridLogMessage<<"Leaf "<<obj::NestLevel<<std::endl;
|
||||||
return arg;
|
return arg;
|
||||||
}
|
}
|
||||||
template<int N,class obj,typename std::enable_if<N!=obj::NestLevel >::type * = nullptr > auto function(const obj &arg)-> obj
|
template<int N,class obj,typename std::enable_if<N!=obj::NestLevel >::type * = nullptr > auto function(const obj &arg)-> obj
|
||||||
{
|
{
|
||||||
std::cout<<"Node "<<obj::NestLevel<<std::endl;
|
std::cout<<GridLogMessage<<"Node "<<obj::NestLevel<<std::endl;
|
||||||
obj ret;
|
obj ret;
|
||||||
ret.internal=function<N>(arg.internal);
|
ret.internal=function<N>(arg.internal);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#include <algorithms/SparseMatrix.h>
|
#include <algorithms/SparseMatrix.h>
|
||||||
#include <algorithms/LinearOperator.h>
|
#include <algorithms/LinearOperator.h>
|
||||||
#include <algorithms/CoarsenedMatrix.h>
|
#include <algorithms/Preconditioner.h>
|
||||||
|
|
||||||
#include <algorithms/approx/Zolotarev.h>
|
#include <algorithms/approx/Zolotarev.h>
|
||||||
#include <algorithms/approx/Chebyshev.h>
|
#include <algorithms/approx/Chebyshev.h>
|
||||||
@ -17,6 +17,12 @@
|
|||||||
|
|
||||||
#include <algorithms/iterative/ConjugateGradientMultiShift.h>
|
#include <algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||||
|
|
||||||
|
// Lanczos support
|
||||||
|
#include <algorithms/iterative/MatrixUtils.h>
|
||||||
|
#include <algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||||
|
|
||||||
|
#include <algorithms/CoarsenedMatrix.h>
|
||||||
|
|
||||||
// Eigen/lanczos
|
// Eigen/lanczos
|
||||||
// EigCg
|
// EigCg
|
||||||
// MCR
|
// MCR
|
||||||
|
@ -1,6 +1,13 @@
|
|||||||
#ifndef GRID_ALIGNED_ALLOCATOR_H
|
#ifndef GRID_ALIGNED_ALLOCATOR_H
|
||||||
#define GRID_ALIGNED_ALLOCATOR_H
|
#define GRID_ALIGNED_ALLOCATOR_H
|
||||||
|
|
||||||
|
#ifdef HAVE_MALLOC_MALLOC_H
|
||||||
|
#include <malloc/malloc.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_MALLOC_H
|
||||||
|
#include <malloc.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
#include <mm_malloc.h>
|
#include <mm_malloc.h>
|
||||||
@ -65,7 +72,6 @@ operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return t
|
|||||||
|
|
||||||
template<typename _Tp> inline bool
|
template<typename _Tp> inline bool
|
||||||
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
|
|
||||||
}; // namespace Grid
|
}; // namespace Grid
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */
|
/* lib/Config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
/* AVX Intrinsics */
|
/* AVX Intrinsics */
|
||||||
#undef AVX1
|
#undef AVX1
|
||||||
@ -6,9 +6,12 @@
|
|||||||
/* AVX2 Intrinsics */
|
/* AVX2 Intrinsics */
|
||||||
#undef AVX2
|
#undef AVX2
|
||||||
|
|
||||||
/* AVX512 Intrinsics for Knights Corner */
|
/* AVX512 Intrinsics for Knights Landing */
|
||||||
#undef AVX512
|
#undef AVX512
|
||||||
|
|
||||||
|
/* AVX Intrinsics with FMA4 */
|
||||||
|
#undef AVXFMA4
|
||||||
|
|
||||||
/* EMPTY_SIMD only for DEBUGGING */
|
/* EMPTY_SIMD only for DEBUGGING */
|
||||||
#undef EMPTY_SIMD
|
#undef EMPTY_SIMD
|
||||||
|
|
||||||
@ -18,6 +21,12 @@
|
|||||||
/* GRID_COMMS_NONE */
|
/* GRID_COMMS_NONE */
|
||||||
#undef GRID_COMMS_NONE
|
#undef GRID_COMMS_NONE
|
||||||
|
|
||||||
|
/* GRID_DEFAULT_PRECISION is DOUBLE */
|
||||||
|
#undef GRID_DEFAULT_PRECISION_DOUBLE
|
||||||
|
|
||||||
|
/* GRID_DEFAULT_PRECISION is SINGLE */
|
||||||
|
#undef GRID_DEFAULT_PRECISION_SINGLE
|
||||||
|
|
||||||
/* Support Altivec instructions */
|
/* Support Altivec instructions */
|
||||||
#undef HAVE_ALTIVEC
|
#undef HAVE_ALTIVEC
|
||||||
|
|
||||||
@ -27,9 +36,6 @@
|
|||||||
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
|
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
|
||||||
#undef HAVE_AVX2
|
#undef HAVE_AVX2
|
||||||
|
|
||||||
/* define if the compiler supports basic C++11 syntax */
|
|
||||||
#undef HAVE_CXX11
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
|
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
|
||||||
don't. */
|
don't. */
|
||||||
#undef HAVE_DECL_BE64TOH
|
#undef HAVE_DECL_BE64TOH
|
||||||
@ -41,6 +47,9 @@
|
|||||||
/* Define to 1 if you have the <endian.h> header file. */
|
/* Define to 1 if you have the <endian.h> header file. */
|
||||||
#undef HAVE_ENDIAN_H
|
#undef HAVE_ENDIAN_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <execinfo.h> header file. */
|
||||||
|
#undef HAVE_EXECINFO_H
|
||||||
|
|
||||||
/* Support FMA3 (Fused Multiply-Add) instructions */
|
/* Support FMA3 (Fused Multiply-Add) instructions */
|
||||||
#undef HAVE_FMA
|
#undef HAVE_FMA
|
||||||
|
|
||||||
@ -53,12 +62,6 @@
|
|||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
#undef HAVE_INTTYPES_H
|
#undef HAVE_INTTYPES_H
|
||||||
|
|
||||||
/* Define to 1 if you have the `gmp' library (-lgmp). */
|
|
||||||
#undef HAVE_LIBGMP
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
|
|
||||||
#undef HAVE_LIBMPFR
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <malloc.h> header file. */
|
/* Define to 1 if you have the <malloc.h> header file. */
|
||||||
#undef HAVE_MALLOC_H
|
#undef HAVE_MALLOC_H
|
||||||
|
|
||||||
@ -113,8 +116,11 @@
|
|||||||
/* Define to 1 if you have the <unistd.h> header file. */
|
/* Define to 1 if you have the <unistd.h> header file. */
|
||||||
#undef HAVE_UNISTD_H
|
#undef HAVE_UNISTD_H
|
||||||
|
|
||||||
/* NEON ARMv7 Experimental support */
|
/* IMCI Intrinsics for Knights Corner */
|
||||||
#undef NEONv7
|
#undef IMCI
|
||||||
|
|
||||||
|
/* NEON ARMv8 Experimental support */
|
||||||
|
#undef NEONv8
|
||||||
|
|
||||||
/* Name of package */
|
/* Name of package */
|
||||||
#undef PACKAGE
|
#undef PACKAGE
|
||||||
@ -131,9 +137,6 @@
|
|||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#undef PACKAGE_TARNAME
|
#undef PACKAGE_TARNAME
|
||||||
|
|
||||||
/* Define to the home page for this package. */
|
|
||||||
#undef PACKAGE_URL
|
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#undef PACKAGE_VERSION
|
#undef PACKAGE_VERSION
|
||||||
|
|
87
lib/Grid.h
87
lib/Grid.h
@ -6,92 +6,49 @@
|
|||||||
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
|
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
||||||
#ifndef GRID_H
|
#ifndef GRID_H
|
||||||
#define GRID_H
|
#define GRID_H
|
||||||
|
|
||||||
|
///////////////////
|
||||||
|
// Std C++ dependencies
|
||||||
|
///////////////////
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
#include <complex>
|
#include <complex>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <random>
|
#include <random>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <sys/time.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
#include <ctime>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
#ifndef MAX
|
///////////////////
|
||||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
// Grid headers
|
||||||
#define MIN(x,y) ((x)>(y)?(y):(x))
|
///////////////////
|
||||||
#endif
|
#include <serialisation/Serialisation.h>
|
||||||
|
#include <Config.h>
|
||||||
#define strong_inline __attribute__((always_inline)) inline
|
#include <Timer.h>
|
||||||
|
#include <Log.h>
|
||||||
#include <GridConfig.h>
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
// Tunable header includes
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#ifdef HAVE_MALLOC_MALLOC_H
|
|
||||||
#include <malloc/malloc.h>
|
|
||||||
#endif
|
|
||||||
#ifdef HAVE_MALLOC_H
|
|
||||||
#include <malloc.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <AlignedAllocator.h>
|
#include <AlignedAllocator.h>
|
||||||
|
|
||||||
#include <Simd.h>
|
#include <Simd.h>
|
||||||
#include <Threads.h>
|
#include <Threads.h>
|
||||||
|
#include <Communicator.h>
|
||||||
#include <Communicator.h> // subdir aggregate
|
#include <Cartesian.h>
|
||||||
#include <Cartesian.h> // subdir aggregate
|
#include <Tensors.h>
|
||||||
#include <Tensors.h> // subdir aggregate
|
#include <Lattice.h>
|
||||||
#include <Lattice.h> // subdir aggregate
|
#include <Cshift.h>
|
||||||
#include <Cshift.h> // subdir aggregate
|
#include <Stencil.h>
|
||||||
#include <Stencil.h> // subdir aggregate
|
#include <Algorithms.h>
|
||||||
#include <Algorithms.h>// subdir aggregate
|
|
||||||
|
|
||||||
#include <qcd/QCD.h>
|
#include <qcd/QCD.h>
|
||||||
|
#include <parallelIO/BinaryIO.h>
|
||||||
#include <parallelIO/NerscIO.h>
|
#include <parallelIO/NerscIO.h>
|
||||||
|
|
||||||
namespace Grid {
|
#include <Init.h>
|
||||||
|
|
||||||
void Grid_init(int *argc,char ***argv);
|
|
||||||
void Grid_finalize(void);
|
|
||||||
// internal, controled with --handle
|
|
||||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
|
|
||||||
void Grid_debug_handler_init(void);
|
|
||||||
void Grid_quiesce_nodes(void);
|
|
||||||
void Grid_unquiesce_nodes(void);
|
|
||||||
|
|
||||||
// C++11 time facilities better?
|
|
||||||
double usecond(void);
|
|
||||||
|
|
||||||
const std::vector<int> GridDefaultSimd(int dims,int nsimd);
|
|
||||||
const std::vector<int> &GridDefaultLatt(void);
|
|
||||||
const std::vector<int> &GridDefaultMpi(void);
|
|
||||||
const int &GridThreads(void) ;
|
|
||||||
void GridSetThreads(int t) ;
|
|
||||||
|
|
||||||
// Common parsing chores
|
|
||||||
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
|
|
||||||
bool GridCmdOptionExists(char** begin, char** end, const std::string& option);
|
|
||||||
std::string GridCmdVectorIntToString(const std::vector<int> & vec);
|
|
||||||
|
|
||||||
void GridParseLayout(char **argv,int argc,
|
|
||||||
std::vector<int> &latt,
|
|
||||||
std::vector<int> &simd,
|
|
||||||
std::vector<int> &mpi);
|
|
||||||
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
169
lib/GridConfig.h
169
lib/GridConfig.h
@ -1,169 +0,0 @@
|
|||||||
/* lib/GridConfig.h. Generated from GridConfig.h.in by configure. */
|
|
||||||
/* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */
|
|
||||||
|
|
||||||
/* AVX Intrinsics */
|
|
||||||
/* #undef AVX1 */
|
|
||||||
|
|
||||||
/* AVX2 Intrinsics */
|
|
||||||
/* #undef AVX2 */
|
|
||||||
|
|
||||||
/* AVX512 Intrinsics for Knights Corner */
|
|
||||||
/* #undef AVX512 */
|
|
||||||
|
|
||||||
/* EMPTY_SIMD only for DEBUGGING */
|
|
||||||
/* #undef EMPTY_SIMD */
|
|
||||||
|
|
||||||
/* GRID_COMMS_MPI */
|
|
||||||
/* #undef GRID_COMMS_MPI */
|
|
||||||
|
|
||||||
/* GRID_COMMS_NONE */
|
|
||||||
#define GRID_COMMS_NONE 1
|
|
||||||
|
|
||||||
/* Support Altivec instructions */
|
|
||||||
/* #undef HAVE_ALTIVEC */
|
|
||||||
|
|
||||||
/* Support AVX (Advanced Vector Extensions) instructions */
|
|
||||||
/* #undef HAVE_AVX */
|
|
||||||
|
|
||||||
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
|
|
||||||
/* #undef HAVE_AVX2 */
|
|
||||||
|
|
||||||
/* define if the compiler supports basic C++11 syntax */
|
|
||||||
/* #undef HAVE_CXX11 */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#define HAVE_DECL_BE64TOH 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `ntohll', and to 0 if you don't.
|
|
||||||
*/
|
|
||||||
#define HAVE_DECL_NTOHLL 0
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <endian.h> header file. */
|
|
||||||
#define HAVE_ENDIAN_H 1
|
|
||||||
|
|
||||||
/* Support FMA3 (Fused Multiply-Add) instructions */
|
|
||||||
/* #undef HAVE_FMA */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `gettimeofday' function. */
|
|
||||||
#define HAVE_GETTIMEOFDAY 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <gmp.h> header file. */
|
|
||||||
#define HAVE_GMP_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
|
||||||
#define HAVE_INTTYPES_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `gmp' library (-lgmp). */
|
|
||||||
#define HAVE_LIBGMP 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
|
|
||||||
#define HAVE_LIBMPFR 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <malloc.h> header file. */
|
|
||||||
#define HAVE_MALLOC_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <malloc/malloc.h> header file. */
|
|
||||||
/* #undef HAVE_MALLOC_MALLOC_H */
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <memory.h> header file. */
|
|
||||||
#define HAVE_MEMORY_H 1
|
|
||||||
|
|
||||||
/* Support mmx instructions */
|
|
||||||
#define HAVE_MMX /**/
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <mm_malloc.h> header file. */
|
|
||||||
#define HAVE_MM_MALLOC_H 1
|
|
||||||
|
|
||||||
/* Support SSE (Streaming SIMD Extensions) instructions */
|
|
||||||
#define HAVE_SSE /**/
|
|
||||||
|
|
||||||
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
|
|
||||||
#define HAVE_SSE2 /**/
|
|
||||||
|
|
||||||
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
|
|
||||||
#define HAVE_SSE3 /**/
|
|
||||||
|
|
||||||
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
|
|
||||||
#define HAVE_SSE4_1 /**/
|
|
||||||
|
|
||||||
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
|
|
||||||
#define HAVE_SSE4_2 /**/
|
|
||||||
|
|
||||||
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
|
|
||||||
#define HAVE_SSSE3 /**/
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdint.h> header file. */
|
|
||||||
#define HAVE_STDINT_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
|
||||||
#define HAVE_STDLIB_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <strings.h> header file. */
|
|
||||||
#define HAVE_STRINGS_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <string.h> header file. */
|
|
||||||
#define HAVE_STRING_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
|
||||||
#define HAVE_SYS_STAT_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
|
||||||
#define HAVE_SYS_TYPES_H 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <unistd.h> header file. */
|
|
||||||
#define HAVE_UNISTD_H 1
|
|
||||||
|
|
||||||
/* NEON ARMv7 Experimental support */
|
|
||||||
/* #undef NEONv7 */
|
|
||||||
|
|
||||||
/* Name of package */
|
|
||||||
#define PACKAGE "grid"
|
|
||||||
|
|
||||||
/* Define to the address where bug reports for this package should be sent. */
|
|
||||||
#define PACKAGE_BUGREPORT "paboyle@ph.ed.ac.uk"
|
|
||||||
|
|
||||||
/* Define to the full name of this package. */
|
|
||||||
#define PACKAGE_NAME "Grid"
|
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
|
||||||
#define PACKAGE_STRING "Grid 1.0"
|
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
|
||||||
#define PACKAGE_TARNAME "grid"
|
|
||||||
|
|
||||||
/* Define to the home page for this package. */
|
|
||||||
#define PACKAGE_URL ""
|
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
|
||||||
#define PACKAGE_VERSION "1.0"
|
|
||||||
|
|
||||||
/* SSE4 Intrinsics */
|
|
||||||
#define SSE4 1
|
|
||||||
|
|
||||||
/* Define to 1 if you have the ANSI C header files. */
|
|
||||||
#define STDC_HEADERS 1
|
|
||||||
|
|
||||||
/* Version number of package */
|
|
||||||
#define VERSION "1.0"
|
|
||||||
|
|
||||||
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
|
|
||||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
|
||||||
#define below would cause a syntax error. */
|
|
||||||
/* #undef _UINT32_T */
|
|
||||||
|
|
||||||
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
|
|
||||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
|
||||||
#define below would cause a syntax error. */
|
|
||||||
/* #undef _UINT64_T */
|
|
||||||
|
|
||||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
|
||||||
/* #undef size_t */
|
|
||||||
|
|
||||||
/* Define to the type of an unsigned integer type of width exactly 32 bits if
|
|
||||||
such a type exists and the standard includes do not define it. */
|
|
||||||
/* #undef uint32_t */
|
|
||||||
|
|
||||||
/* Define to the type of an unsigned integer type of width exactly 64 bits if
|
|
||||||
such a type exists and the standard includes do not define it. */
|
|
||||||
/* #undef uint64_t */
|
|
@ -1,7 +1,6 @@
|
|||||||
/****************************************************************************/
|
/****************************************************************************/
|
||||||
/* pab: Signal magic. Processor state dump is x86-64 specific */
|
/* pab: Signal magic. Processor state dump is x86-64 specific */
|
||||||
/****************************************************************************/
|
/****************************************************************************/
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
@ -16,26 +15,29 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
|
||||||
#undef __X86_64
|
#define __X86_64
|
||||||
#define MAC
|
|
||||||
|
|
||||||
#ifdef MAC
|
#ifdef HAVE_EXECINFO_H
|
||||||
#include <execinfo.h>
|
#include <execinfo.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Convenience functions to access stadard command line arg
|
// Convenience functions to access stadard command line arg
|
||||||
// driven parallelism controls
|
// driven parallelism controls
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
static std::vector<int> Grid_default_latt;
|
static std::vector<int> Grid_default_latt;
|
||||||
static std::vector<int> Grid_default_mpi;
|
static std::vector<int> Grid_default_mpi;
|
||||||
|
|
||||||
int GridThread::_threads;
|
int GridThread::_threads =1;
|
||||||
|
int GridThread::_hyperthreads=1;
|
||||||
|
int GridThread::_cores=1;
|
||||||
|
|
||||||
const std::vector<int> GridDefaultSimd(int dims,int nsimd)
|
const std::vector<int> &GridDefaultLatt(void) {return Grid_default_latt;};
|
||||||
{
|
const std::vector<int> &GridDefaultMpi(void) {return Grid_default_mpi;};
|
||||||
|
const std::vector<int> GridDefaultSimd(int dims,int nsimd)
|
||||||
|
{
|
||||||
std::vector<int> layout(dims);
|
std::vector<int> layout(dims);
|
||||||
int nn=nsimd;
|
int nn=nsimd;
|
||||||
for(int d=dims-1;d>=0;d--){
|
for(int d=dims-1;d>=0;d--){
|
||||||
@ -48,15 +50,11 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
assert(nn==1);
|
assert(nn==1);
|
||||||
return layout;
|
return layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const std::vector<int> &GridDefaultLatt(void) {return Grid_default_latt;};
|
////////////////////////////////////////////////////////////
|
||||||
const std::vector<int> &GridDefaultMpi(void) {return Grid_default_mpi;};
|
// Command line parsing assist for stock controls
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
// Command line parsing assist for stock controls
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option)
|
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option)
|
||||||
{
|
{
|
||||||
char ** itr = std::find(begin, end, option);
|
char ** itr = std::find(begin, end, option);
|
||||||
@ -70,6 +68,23 @@ bool GridCmdOptionExists(char** begin, char** end, const std::string& option)
|
|||||||
{
|
{
|
||||||
return std::find(begin, end, option) != end;
|
return std::find(begin, end, option) != end;
|
||||||
}
|
}
|
||||||
|
// Comma separated list
|
||||||
|
void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec)
|
||||||
|
{
|
||||||
|
size_t pos = 0;
|
||||||
|
std::string token;
|
||||||
|
std::string delimiter(",");
|
||||||
|
|
||||||
|
vec.resize(0);
|
||||||
|
while ((pos = str.find(delimiter)) != std::string::npos) {
|
||||||
|
token = str.substr(0, pos);
|
||||||
|
vec.push_back(token);
|
||||||
|
str.erase(0, pos + delimiter.length());
|
||||||
|
}
|
||||||
|
token = str;
|
||||||
|
vec.push_back(token);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
|
void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
|
||||||
{
|
{
|
||||||
@ -84,6 +99,7 @@ void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void GridParseLayout(char **argv,int argc,
|
void GridParseLayout(char **argv,int argc,
|
||||||
std::vector<int> &latt,
|
std::vector<int> &latt,
|
||||||
std::vector<int> &mpi)
|
std::vector<int> &mpi)
|
||||||
@ -102,13 +118,19 @@ void GridParseLayout(char **argv,int argc,
|
|||||||
arg= GridCmdOptionPayload(argv,argv+argc,"--grid");
|
arg= GridCmdOptionPayload(argv,argv+argc,"--grid");
|
||||||
GridCmdOptionIntVector(arg,latt);
|
GridCmdOptionIntVector(arg,latt);
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--omp") ){
|
if( GridCmdOptionExists(argv,argv+argc,"--threads") ){
|
||||||
std::vector<int> ompthreads(0);
|
std::vector<int> ompthreads(0);
|
||||||
arg= GridCmdOptionPayload(argv,argv+argc,"--omp");
|
arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
|
||||||
GridCmdOptionIntVector(arg,ompthreads);
|
GridCmdOptionIntVector(arg,ompthreads);
|
||||||
assert(ompthreads.size()==1);
|
assert(ompthreads.size()==1);
|
||||||
GridThread::SetThreads(ompthreads[0]);
|
GridThread::SetThreads(ompthreads[0]);
|
||||||
}
|
}
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
|
||||||
|
std::vector<int> cores(0);
|
||||||
|
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
|
||||||
|
GridCmdOptionIntVector(arg,cores);
|
||||||
|
GridThread::SetCores(cores[0]);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,8 +139,9 @@ std::string GridCmdVectorIntToString(const std::vector<int> & vec){
|
|||||||
std::copy(vec.begin(), vec.end(),std::ostream_iterator<int>(oss, " "));
|
std::copy(vec.begin(), vec.end(),std::ostream_iterator<int>(oss, " "));
|
||||||
return oss.str();
|
return oss.str();
|
||||||
}
|
}
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
/////////////////////////////////////////////////////////
|
//
|
||||||
|
/////////////////////////////////////////////////////////
|
||||||
void Grid_init(int *argc,char ***argv)
|
void Grid_init(int *argc,char ***argv)
|
||||||
{
|
{
|
||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
@ -126,15 +149,33 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
#endif
|
#endif
|
||||||
// Parse command line args.
|
// Parse command line args.
|
||||||
|
|
||||||
|
GridLogger::StopWatch.Start();
|
||||||
|
|
||||||
|
std::string arg;
|
||||||
|
std::vector<std::string> logstreams;
|
||||||
|
std::string defaultLog("Error,Warning,Message,Performance");
|
||||||
|
|
||||||
|
GridCmdOptionCSL(defaultLog,logstreams);
|
||||||
|
GridLogConfigure(logstreams);
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--help") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--help") ){
|
||||||
std::cout<<"--help : this message"<<std::endl;
|
std::cout<<GridLogMessage<<"--help : this message"<<std::endl;
|
||||||
std::cout<<"--debug-signals : catch sigsegv and print a blame report"<<std::endl;
|
std::cout<<GridLogMessage<<"--debug-signals : catch sigsegv and print a blame report"<<std::endl;
|
||||||
std::cout<<"--debug-stdout : print stdout from EVERY node"<<std::endl;
|
std::cout<<GridLogMessage<<"--debug-stdout : print stdout from EVERY node"<<std::endl;
|
||||||
std::cout<<"--decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
|
std::cout<<GridLogMessage<<"--decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
|
||||||
std::cout<<"--mpi n.n.n.n : default MPI decomposition"<<std::endl;
|
std::cout<<GridLogMessage<<"--mpi n.n.n.n : default MPI decomposition"<<std::endl;
|
||||||
std::cout<<"--omp n : default number of OMP threads"<<std::endl;
|
std::cout<<GridLogMessage<<"--omp n : default number of OMP threads"<<std::endl;
|
||||||
std::cout<<"--grid n.n.n.n : default Grid size"<<std::endl;
|
std::cout<<GridLogMessage<<"--grid n.n.n.n : default Grid size"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"--log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Debug"<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
|
||||||
|
arg = GridCmdOptionPayload(*argv,*argv+*argc,"--log");
|
||||||
|
GridCmdOptionCSL(arg,logstreams);
|
||||||
|
GridLogConfigure(logstreams);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||||
Grid_debug_handler_init();
|
Grid_debug_handler_init();
|
||||||
}
|
}
|
||||||
@ -142,48 +183,33 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
Grid_quiesce_nodes();
|
Grid_quiesce_nodes();
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
|
||||||
WilsonFermion::HandOptDslash=1;
|
QCD::WilsonFermionStatic::HandOptDslash=1;
|
||||||
WilsonFermion5D::HandOptDslash=1;
|
QCD::WilsonFermion5DStatic::HandOptDslash=1;
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
||||||
LebesgueOrder::UseLebesgueOrder=1;
|
LebesgueOrder::UseLebesgueOrder=1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
||||||
|
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
|
||||||
|
GridCmdOptionIntVector(arg,LebesgueOrder::Block);
|
||||||
|
}
|
||||||
GridParseLayout(*argv,*argc,
|
GridParseLayout(*argv,*argc,
|
||||||
Grid_default_latt,
|
Grid_default_latt,
|
||||||
Grid_default_mpi);
|
Grid_default_mpi);
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
||||||
std::cout<<"Grid Decomposition\n";
|
std::cout<<GridLogMessage<<"Grid Decomposition\n";
|
||||||
std::cout<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
||||||
std::cout<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
|
std::cout<<GridLogMessage<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
|
||||||
std::cout<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
|
std::cout<<GridLogMessage<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
|
||||||
std::cout<<"\tvRealD : "<<sizeof(vRealD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
|
std::cout<<GridLogMessage<<"\tvRealD : "<<sizeof(vRealD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
|
||||||
std::cout<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
|
std::cout<<GridLogMessage<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
|
||||||
std::cout<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
|
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
// Verbose limiter on MPI tasks
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
void Grid_quiesce_nodes(void)
|
|
||||||
{
|
|
||||||
#ifdef GRID_COMMS_MPI
|
|
||||||
int me;
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
|
||||||
if ( me ) {
|
|
||||||
std::cout.setstate(std::ios::badbit);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
void Grid_unquiesce_nodes(void)
|
|
||||||
{
|
|
||||||
#ifdef GRID_COMMS_MPI
|
|
||||||
std::cout.clear();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void Grid_finalize(void)
|
void Grid_finalize(void)
|
||||||
{
|
{
|
||||||
@ -207,11 +233,14 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
|||||||
printf(" mem address %llx\n",(unsigned long long)si->si_addr);
|
printf(" mem address %llx\n",(unsigned long long)si->si_addr);
|
||||||
printf(" code %d\n",si->si_code);
|
printf(" code %d\n",si->si_code);
|
||||||
|
|
||||||
#ifdef __X86_64
|
// Linux/Posix
|
||||||
|
#ifdef __linux__
|
||||||
|
// And x86 64bit
|
||||||
ucontext_t * uc= (ucontext_t *)ptr;
|
ucontext_t * uc= (ucontext_t *)ptr;
|
||||||
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
|
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
|
||||||
printf(" instruction %llx\n",(unsigned long long)sc->rip);
|
printf(" instruction %llx\n",(unsigned long long)sc->rip);
|
||||||
#define REG(A) printf(" %s %lx\n",#A,sc-> A);
|
#define REG(A) printf(" %s %lx\n",#A,sc-> A);
|
||||||
|
|
||||||
REG(rdi);
|
REG(rdi);
|
||||||
REG(rsi);
|
REG(rsi);
|
||||||
REG(rbp);
|
REG(rbp);
|
||||||
@ -232,7 +261,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
|||||||
REG(r14);
|
REG(r14);
|
||||||
REG(r15);
|
REG(r15);
|
||||||
#endif
|
#endif
|
||||||
#ifdef MAC
|
#ifdef HAVE_EXECINFO_H
|
||||||
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);
|
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);
|
||||||
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);
|
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);
|
||||||
for (int i = 0; i < symbols; i++){
|
for (int i = 0; i < symbols; i++){
|
32
lib/Init.h
Normal file
32
lib/Init.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#ifndef GRID_INIT_H
|
||||||
|
#define GRID_INIT_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
void Grid_init(int *argc,char ***argv);
|
||||||
|
void Grid_finalize(void);
|
||||||
|
// internal, controled with --handle
|
||||||
|
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
|
||||||
|
void Grid_debug_handler_init(void);
|
||||||
|
void Grid_quiesce_nodes(void);
|
||||||
|
void Grid_unquiesce_nodes(void);
|
||||||
|
|
||||||
|
const std::vector<int> GridDefaultSimd(int dims,int nsimd);
|
||||||
|
const std::vector<int> &GridDefaultLatt(void);
|
||||||
|
const std::vector<int> &GridDefaultMpi(void);
|
||||||
|
const int &GridThreads(void) ;
|
||||||
|
void GridSetThreads(int t) ;
|
||||||
|
|
||||||
|
// Common parsing chores
|
||||||
|
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
|
||||||
|
bool GridCmdOptionExists(char** begin, char** end, const std::string& option);
|
||||||
|
std::string GridCmdVectorIntToString(const std::vector<int> & vec);
|
||||||
|
|
||||||
|
void GridParseLayout(char **argv,int argc,
|
||||||
|
std::vector<int> &latt,
|
||||||
|
std::vector<int> &simd,
|
||||||
|
std::vector<int> &mpi);
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
#endif
|
62
lib/Log.cc
Normal file
62
lib/Log.cc
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
GridStopWatch Logger::StopWatch;
|
||||||
|
std::ostream Logger::devnull(0);
|
||||||
|
|
||||||
|
GridLogger GridLogError (1,"Error");
|
||||||
|
GridLogger GridLogWarning (1,"Warning");
|
||||||
|
GridLogger GridLogMessage (1,"Message");
|
||||||
|
GridLogger GridLogDebug (1,"Debug");
|
||||||
|
GridLogger GridLogPerformance(1,"Performance");
|
||||||
|
GridLogger GridLogIterative (1,"Iterative");
|
||||||
|
|
||||||
|
void GridLogConfigure(std::vector<std::string> &logstreams)
|
||||||
|
{
|
||||||
|
GridLogError.Active(0);
|
||||||
|
GridLogWarning.Active(0);
|
||||||
|
GridLogMessage.Active(0);
|
||||||
|
GridLogIterative.Active(0);
|
||||||
|
GridLogDebug.Active(0);
|
||||||
|
GridLogPerformance.Active(0);
|
||||||
|
|
||||||
|
for(int i=0;i<logstreams.size();i++){
|
||||||
|
if ( logstreams[i]== std::string("Error") ) GridLogError.Active(1);
|
||||||
|
if ( logstreams[i]== std::string("Warning") ) GridLogWarning.Active(1);
|
||||||
|
if ( logstreams[i]== std::string("Message") ) GridLogMessage.Active(1);
|
||||||
|
if ( logstreams[i]== std::string("Iterative") ) GridLogIterative.Active(1);
|
||||||
|
if ( logstreams[i]== std::string("Debug") ) GridLogDebug.Active(1);
|
||||||
|
if ( logstreams[i]== std::string("Performance") ) GridLogPerformance.Active(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// Verbose limiter on MPI tasks
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
void Grid_quiesce_nodes(void)
|
||||||
|
{
|
||||||
|
#ifdef GRID_COMMS_MPI
|
||||||
|
int me;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
||||||
|
if ( me ) {
|
||||||
|
std::cout.setstate(std::ios::badbit);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Grid_unquiesce_nodes(void)
|
||||||
|
{
|
||||||
|
#ifdef GRID_COMMS_MPI
|
||||||
|
std::cout.clear();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<< (std::ostream& stream, const GridTime& time)
|
||||||
|
{
|
||||||
|
stream << time.count()<<" ms";
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
54
lib/Log.h
Normal file
54
lib/Log.h
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
#ifndef GRID_LOG_H
|
||||||
|
#define GRID_LOG_H
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
||||||
|
|
||||||
|
std::ostream& operator<< (std::ostream& stream, const GridTime& time);
|
||||||
|
|
||||||
|
class Logger {
|
||||||
|
protected:
|
||||||
|
int active;
|
||||||
|
std::string name, topName;
|
||||||
|
public:
|
||||||
|
static GridStopWatch StopWatch;
|
||||||
|
static std::ostream devnull;
|
||||||
|
|
||||||
|
Logger(std::string topNm, int on, std::string nm)
|
||||||
|
: active(on), name(nm), topName(topNm) {};
|
||||||
|
|
||||||
|
void Active(int on) {active = on;};
|
||||||
|
int isActive(void) {return active;};
|
||||||
|
|
||||||
|
friend std::ostream& operator<< (std::ostream& stream, const Logger& log){
|
||||||
|
if ( log.active ) {
|
||||||
|
StopWatch.Stop();
|
||||||
|
GridTime now = StopWatch.Elapsed();
|
||||||
|
StopWatch.Start();
|
||||||
|
stream << std::setw(8) << std::left << log.topName << " : ";
|
||||||
|
stream << std::setw(12) << std::left << log.name << " : ";
|
||||||
|
stream << now << " : ";
|
||||||
|
return stream;
|
||||||
|
} else {
|
||||||
|
return devnull;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class GridLogger: public Logger {
|
||||||
|
public:
|
||||||
|
GridLogger(int on, std::string nm): Logger("Grid", on, nm){};
|
||||||
|
};
|
||||||
|
|
||||||
|
void GridLogConfigure(std::vector<std::string> &logstreams);
|
||||||
|
|
||||||
|
extern GridLogger GridLogError;
|
||||||
|
extern GridLogger GridLogWarning;
|
||||||
|
extern GridLogger GridLogMessage;
|
||||||
|
extern GridLogger GridLogDebug ;
|
||||||
|
extern GridLogger GridLogPerformance;
|
||||||
|
extern GridLogger GridLogIterative ;
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -1,4 +1,4 @@
|
|||||||
|
|
||||||
HFILES=./Cshift.h ./simd/Grid_avx.h ./simd/Grid_vector_types.h ./simd/Grid_sse4.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_vector_unops.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./Tensors.h ./Algorithms.h ./communicator/Communicator_base.h ./lattice/Lattice_rng.h ./lattice/Lattice_reduction.h ./lattice/Lattice_transfer.h ./lattice/Lattice_unary.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_comparison.h ./lattice/Lattice_overload.h ./lattice/Lattice_reality.h ./lattice/Lattice_local.h ./lattice/Lattice_conformable.h ./lattice/Lattice_where.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_ET.h ./lattice/Lattice_transpose.h ./lattice/Lattice_trace.h ./Stencil.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_poke.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_class.h ./tensors/Tensor_logical.h ./tensors/Tensor_transpose.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_outer.h ./tensors/Tensor_inner.h ./tensors/Tensor_traits.h ./tensors/Tensor_Ta.h ./tensors/Tensor_unary.h ./tensors/Tensor_determinant.h ./tensors/Tensor_peek.h ./tensors/Tensor_arith.h ./tensors/Tensor_extract_merge.h ./Communicator.h ./Cartesian.h ./parallelIO/NerscIO.h ./qcd/QCD.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/LinalgUtils.h ./qcd/utils/CovariantCshift.h ./qcd/utils/WilsonLoops.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/spin/TwoSpinor.h ./qcd/spin/Dirac.h ./cshift/Cshift_common.h ./cshift/Cshift_none.h ./cshift/Cshift_mpi.h ./Simd.h ./GridConfig.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_red_black.h ./cartesian/Cartesian_full.h ./AlignedAllocator.h ./Lattice.h ./Threads.h ./Grid.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Remez.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./algorithms/CoarsenedMatrix.h ./stencil/Lebesgue.h
|
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Config.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./Init.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./pugixml/pugixml.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h ./Timer.h
|
||||||
|
|
||||||
CCFILES=./qcd/utils/SpaceTimeGrid.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/spin/Dirac.cc ./GridInit.cc ./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./Init.cc ./Log.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./serialisation/BinaryIO.cc ./serialisation/TextIO.cc ./serialisation/XmlIO.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
||||||
|
@ -11,7 +11,7 @@ namespace Grid {
|
|||||||
//template<int Level> inline ComplexD peekIndex(const ComplexD arg) { return arg;}
|
//template<int Level> inline ComplexD peekIndex(const ComplexD arg) { return arg;}
|
||||||
//template<int Level> inline RealF peekIndex(const RealF arg) { return arg;}
|
//template<int Level> inline RealF peekIndex(const RealF arg) { return arg;}
|
||||||
//template<int Level> inline RealD peekIndex(const RealD arg) { return arg;}
|
//template<int Level> inline RealD peekIndex(const RealD arg) { return arg;}
|
||||||
|
#if 0
|
||||||
// Scalar peek, no indices
|
// Scalar peek, no indices
|
||||||
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
||||||
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<vtype>
|
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<vtype>
|
||||||
@ -88,6 +88,7 @@ template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::T
|
|||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// matrix
|
// matrix
|
||||||
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||||
auto peekIndex(const iMatrix<vtype,N> &arg) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N>
|
auto peekIndex(const iMatrix<vtype,N> &arg) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N>
|
||||||
@ -119,6 +120,7 @@ template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::T
|
|||||||
}}
|
}}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
@ -5,7 +5,7 @@ namespace Grid {
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Poke a specific index;
|
// Poke a specific index;
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
#if 0
|
||||||
// Scalar poke
|
// Scalar poke
|
||||||
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
||||||
void pokeIndex(iScalar<vtype> &ret, const iScalar<vtype> &arg)
|
void pokeIndex(iScalar<vtype> &ret, const iScalar<vtype> &arg)
|
||||||
@ -18,7 +18,7 @@ template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::Te
|
|||||||
{
|
{
|
||||||
ret._internal[i] = arg._internal;
|
ret._internal[i] = arg._internal;
|
||||||
}
|
}
|
||||||
// Vector poke, two indices
|
//Matrix poke, two indices
|
||||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
||||||
void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
|
void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
|
||||||
{
|
{
|
||||||
@ -31,7 +31,6 @@ template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::Te
|
|||||||
// scalar
|
// scalar
|
||||||
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||||
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal))> &arg)
|
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal))> &arg)
|
||||||
|
|
||||||
{
|
{
|
||||||
pokeIndex<Level>(ret._internal,arg._internal);
|
pokeIndex<Level>(ret._internal,arg._internal);
|
||||||
}
|
}
|
||||||
@ -95,7 +94,7 @@ template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::Te
|
|||||||
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
|
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
28
lib/PerfCount.cc
Normal file
28
lib/PerfCount.cc
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
#include <Grid.h>
|
||||||
|
#include <PerfCount.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
|
||||||
|
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
|
||||||
|
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES , "CPUCYCLES.........." },
|
||||||
|
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS , "INSTRUCTIONS......." },
|
||||||
|
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES , "CACHE_REFERENCES..." },
|
||||||
|
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES , "CACHE_MISSES......." },
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS) , "L1D_READ_MISS......"},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS) , "L1D_READ_ACCESS...."},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS) , "L1D_WRITE_MISS....."},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS) , "L1D_WRITE_ACCESS..."},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS) , "L1D_PREFETCH_MISS.."},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS"},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS) , "LL_READ_MISS......."},
|
||||||
|
// { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS) , "LL_READ_ACCESS....."},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS) , "LL_WRITE_MISS......"},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS) , "LL_WRITE_ACCESS...."},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS) , "LL_PREFETCH_MISS..."},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS) , "LL_PREFETCH_ACCESS."},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS) , "L1I_READ_MISS......"},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS) , "L1I_READ_ACCESS...."}
|
||||||
|
// { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" },
|
||||||
|
};
|
||||||
|
}
|
157
lib/PerfCount.h
Normal file
157
lib/PerfCount.h
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
#ifndef GRID_PERFCOUNT_H
|
||||||
|
#define GRID_PERFCOUNT_H
|
||||||
|
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <ctime>
|
||||||
|
#include <chrono>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
#include <syscall.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
#endif
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
||||||
|
int cpu, int group_fd, unsigned long flags)
|
||||||
|
{
|
||||||
|
int ret=0;
|
||||||
|
|
||||||
|
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
|
||||||
|
group_fd, flags);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
class PerformanceCounter {
|
||||||
|
private:
|
||||||
|
typedef struct {
|
||||||
|
public:
|
||||||
|
uint32_t type;
|
||||||
|
uint64_t config;
|
||||||
|
const char *name;
|
||||||
|
} PerformanceCounterConfig;
|
||||||
|
|
||||||
|
static const PerformanceCounterConfig PerformanceCounterConfigs [];
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
enum PerformanceCounterType {
|
||||||
|
CPUCYCLES=0,
|
||||||
|
INSTRUCTIONS,
|
||||||
|
// STALL_CYCLES,
|
||||||
|
CACHE_REFERENCES,
|
||||||
|
CACHE_MISSES,
|
||||||
|
L1D_READ_MISS,
|
||||||
|
L1D_READ_ACCESS,
|
||||||
|
L1D_WRITE_MISS,
|
||||||
|
L1D_WRITE_ACCESS,
|
||||||
|
L1D_PREFETCH_MISS,
|
||||||
|
L1D_PREFETCH_ACCESS,
|
||||||
|
LL_READ_MISS,
|
||||||
|
// LL_READ_ACCESS,
|
||||||
|
LL_WRITE_MISS,
|
||||||
|
LL_WRITE_ACCESS,
|
||||||
|
LL_PREFETCH_MISS,
|
||||||
|
LL_PREFETCH_ACCESS,
|
||||||
|
L1I_READ_MISS,
|
||||||
|
L1I_READ_ACCESS,
|
||||||
|
PERFORMANCE_COUNTER_NUM_TYPES
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
int PCT;
|
||||||
|
|
||||||
|
long long count;
|
||||||
|
int fd;
|
||||||
|
uint64_t elapsed;
|
||||||
|
uint64_t begin;
|
||||||
|
|
||||||
|
static int NumTypes(void){
|
||||||
|
return PERFORMANCE_COUNTER_NUM_TYPES;
|
||||||
|
}
|
||||||
|
|
||||||
|
PerformanceCounter(int _pct) {
|
||||||
|
#ifdef __linux__
|
||||||
|
assert(_pct>=0);
|
||||||
|
assert(_pct<PERFORMANCE_COUNTER_NUM_TYPES);
|
||||||
|
fd=-1;
|
||||||
|
count=0;
|
||||||
|
PCT =_pct;
|
||||||
|
Open();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void Open(void)
|
||||||
|
{
|
||||||
|
#ifdef __linux__
|
||||||
|
struct perf_event_attr pe;
|
||||||
|
memset(&pe, 0, sizeof(struct perf_event_attr));
|
||||||
|
pe.size = sizeof(struct perf_event_attr);
|
||||||
|
|
||||||
|
pe.disabled = 1;
|
||||||
|
pe.exclude_kernel = 1;
|
||||||
|
pe.exclude_hv = 1;
|
||||||
|
pe.inherit = 1;
|
||||||
|
|
||||||
|
pe.type = PerformanceCounterConfigs[PCT].type;
|
||||||
|
pe.config= PerformanceCounterConfigs[PCT].config;
|
||||||
|
const char * name = PerformanceCounterConfigs[PCT].name;
|
||||||
|
fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
|
||||||
|
if (fd == -1) {
|
||||||
|
fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
|
||||||
|
perror("Error is");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Start(void)
|
||||||
|
{
|
||||||
|
#ifdef __linux__
|
||||||
|
if ( fd!= -1) {
|
||||||
|
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
|
||||||
|
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||||
|
}
|
||||||
|
begin =__rdtsc();
|
||||||
|
#else
|
||||||
|
begin = 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Stop(void) {
|
||||||
|
count=0;
|
||||||
|
#ifdef __linux__
|
||||||
|
if ( fd!= -1) {
|
||||||
|
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
||||||
|
::read(fd, &count, sizeof(long long));
|
||||||
|
}
|
||||||
|
elapsed = __rdtsc() - begin;
|
||||||
|
#else
|
||||||
|
elapsed = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
void Report(void) {
|
||||||
|
#ifdef __linux__
|
||||||
|
printf("%llu cycles %s = %20llu\n", elapsed , PerformanceCounterConfigs[PCT].name, count);
|
||||||
|
#else
|
||||||
|
printf("%llu cycles \n", elapsed );
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
~PerformanceCounter()
|
||||||
|
{
|
||||||
|
#ifdef __linux__
|
||||||
|
close(fd);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -13,6 +13,11 @@
|
|||||||
|
|
||||||
typedef uint32_t Integer;
|
typedef uint32_t Integer;
|
||||||
|
|
||||||
|
#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
|
||||||
|
#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
|
||||||
|
#define _MM_SELECT_FOUR_TWO (A,B,C,D) _MM_SELECT_EIGHT_TWO(0,0,0,0,A,B,C,D)
|
||||||
|
#define _MM_SELECT_TWO_TWO (A,B) _MM_SELECT_FOUR_TWO(0,0,A,B)
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
typedef float RealF;
|
typedef float RealF;
|
||||||
|
417
lib/Stencil.h
417
lib/Stencil.h
@ -41,11 +41,21 @@
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
struct StencilEntry {
|
||||||
|
int _offset;
|
||||||
|
int _is_local;
|
||||||
|
int _permute;
|
||||||
|
int _around_the_world;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class vobj,class cobj, class compressor>
|
||||||
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
|
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
|
||||||
public:
|
public:
|
||||||
|
|
||||||
typedef uint32_t StencilInteger;
|
typedef uint32_t StencilInteger;
|
||||||
|
typedef typename cobj::vector_type vector_type;
|
||||||
|
typedef typename cobj::scalar_type scalar_type;
|
||||||
|
typedef typename cobj::scalar_object scalar_object;
|
||||||
|
|
||||||
int _checkerboard;
|
int _checkerboard;
|
||||||
int _npoints; // Move to template param?
|
int _npoints; // Move to template param?
|
||||||
@ -58,35 +68,336 @@ namespace Grid {
|
|||||||
std::vector<int> _permute_type;
|
std::vector<int> _permute_type;
|
||||||
|
|
||||||
// npoints x Osites() of these
|
// npoints x Osites() of these
|
||||||
std::vector<std::vector<int> > _offsets;
|
std::vector<std::vector<StencilEntry> > _entries;
|
||||||
std::vector<std::vector<int> > _is_local;
|
|
||||||
std::vector<std::vector<int> > _permute;
|
// Comms buffers
|
||||||
|
std::vector<std::vector<scalar_object> > send_buf_extract;
|
||||||
|
std::vector<std::vector<scalar_object> > recv_buf_extract;
|
||||||
|
std::vector<scalar_object *> pointers;
|
||||||
|
std::vector<scalar_object *> rpointers;
|
||||||
|
Vector<cobj> send_buf;
|
||||||
|
|
||||||
|
inline StencilEntry * GetEntry(int &ptype,int point,int osite) { ptype = _permute_type[point]; return & _entries[point][osite]; }
|
||||||
|
|
||||||
int _unified_buffer_size;
|
int _unified_buffer_size;
|
||||||
int _request_count;
|
int _request_count;
|
||||||
|
|
||||||
|
double buftime;
|
||||||
|
double gathertime;
|
||||||
|
double commtime;
|
||||||
|
double commstime;
|
||||||
|
double halotime;
|
||||||
|
double scattertime;
|
||||||
|
double mergetime;
|
||||||
|
double gathermtime;
|
||||||
|
double splicetime;
|
||||||
|
double nosplicetime;
|
||||||
|
|
||||||
CartesianStencil(GridBase *grid,
|
|
||||||
int npoints,
|
|
||||||
int checkerboard,
|
|
||||||
const std::vector<int> &directions,
|
CartesianStencil(GridBase *grid,
|
||||||
const std::vector<int> &distances);
|
int npoints,
|
||||||
|
int checkerboard,
|
||||||
|
const std::vector<int> &directions,
|
||||||
|
const std::vector<int> &distances)
|
||||||
|
: _entries(npoints), _permute_type(npoints), _comm_buf_size(npoints)
|
||||||
|
{
|
||||||
|
gathertime=0;
|
||||||
|
commtime=0;
|
||||||
|
commstime=0;
|
||||||
|
halotime=0;
|
||||||
|
scattertime=0;
|
||||||
|
mergetime=0;
|
||||||
|
gathermtime=0;
|
||||||
|
buftime=0;
|
||||||
|
splicetime=0;
|
||||||
|
nosplicetime=0;
|
||||||
|
|
||||||
|
_npoints = npoints;
|
||||||
|
_grid = grid;
|
||||||
|
_directions = directions;
|
||||||
|
_distances = distances;
|
||||||
|
_unified_buffer_size=0;
|
||||||
|
_request_count =0;
|
||||||
|
|
||||||
|
int osites = _grid->oSites();
|
||||||
|
|
||||||
|
for(int i=0;i<npoints;i++){
|
||||||
|
|
||||||
|
int point = i;
|
||||||
|
|
||||||
|
_entries[i].resize( osites);
|
||||||
|
|
||||||
|
int dimension = directions[i];
|
||||||
|
int displacement = distances[i];
|
||||||
|
int shift = displacement;
|
||||||
|
|
||||||
|
int fd = _grid->_fdimensions[dimension];
|
||||||
|
int rd = _grid->_rdimensions[dimension];
|
||||||
|
_permute_type[point]=_grid->PermuteType(dimension);
|
||||||
|
|
||||||
|
_checkerboard = checkerboard;
|
||||||
|
|
||||||
|
// the permute type
|
||||||
|
int simd_layout = _grid->_simd_layout[dimension];
|
||||||
|
int comm_dim = _grid->_processors[dimension] >1 ;
|
||||||
|
int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim);
|
||||||
|
|
||||||
|
int sshift[2];
|
||||||
|
|
||||||
|
// Underlying approach. For each local site build
|
||||||
|
// up a table containing the npoint "neighbours" and whether they
|
||||||
|
// live in lattice or a comms buffer.
|
||||||
|
if ( !comm_dim ) {
|
||||||
|
sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
|
||||||
|
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
|
||||||
|
|
||||||
|
if ( sshift[0] == sshift[1] ) {
|
||||||
|
Local(point,dimension,shift,0x3);
|
||||||
|
} else {
|
||||||
|
Local(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||||
|
Local(point,dimension,shift,0x2);// both with block stride loop iteration
|
||||||
|
}
|
||||||
|
} else { // All permute extract done in comms phase prior to Stencil application
|
||||||
|
// So tables are the same whether comm_dim or splice_dim
|
||||||
|
sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
|
||||||
|
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
|
||||||
|
if ( sshift[0] == sshift[1] ) {
|
||||||
|
Comms(point,dimension,shift,0x3);
|
||||||
|
// std::cout<<"Comms 0x3"<<std::endl;
|
||||||
|
} else {
|
||||||
|
Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||||
|
Comms(point,dimension,shift,0x2);// both with block stride loop iteration
|
||||||
|
// std::cout<<"Comms 0x1 ; 0x2"<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// for(int ss=0;ss<osites;ss++){
|
||||||
|
// std::cout << "point["<<i<<"] "<<ss<<"-> o"<<_entries[i][ss]._offset<<"; l"<<
|
||||||
|
// _entries[i][ss]._is_local<<"; p"<<_entries[i][ss]._permute<<std::endl;
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Local (int point, int dimension,int shiftpm,int cbmask)
|
||||||
|
{
|
||||||
|
int fd = _grid->_fdimensions[dimension];
|
||||||
|
int rd = _grid->_rdimensions[dimension];
|
||||||
|
int ld = _grid->_ldimensions[dimension];
|
||||||
|
int gd = _grid->_gdimensions[dimension];
|
||||||
|
|
||||||
|
// Map to always positive shift modulo global full dimension.
|
||||||
|
int shift = (shiftpm+fd)%fd;
|
||||||
|
|
||||||
|
// the permute type
|
||||||
|
int permute_dim =_grid->PermuteDim(dimension);
|
||||||
|
|
||||||
|
for(int x=0;x<rd;x++){
|
||||||
|
|
||||||
|
int o = 0;
|
||||||
|
int bo = x * _grid->_ostride[dimension];
|
||||||
|
|
||||||
|
int cb= (cbmask==0x2)? Odd : Even;
|
||||||
|
|
||||||
|
int sshift = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
|
||||||
|
int sx = (x+sshift)%rd;
|
||||||
|
|
||||||
|
int wraparound=0;
|
||||||
|
if ( (shiftpm==-1) && (sx>x) ) {
|
||||||
|
wraparound = 1;
|
||||||
|
}
|
||||||
|
if ( (shiftpm== 1) && (sx<x) ) {
|
||||||
|
wraparound = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int permute_slice=0;
|
||||||
|
if(permute_dim){
|
||||||
|
int wrap = sshift/rd;
|
||||||
|
int num = sshift%rd;
|
||||||
|
if ( x< rd-num ) permute_slice=wrap;
|
||||||
|
else permute_slice = 1-wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Comms (int point,int dimension,int shiftpm,int cbmask)
|
||||||
|
{
|
||||||
|
GridBase *grid=_grid;
|
||||||
|
|
||||||
|
int fd = _grid->_fdimensions[dimension];
|
||||||
|
int ld = _grid->_ldimensions[dimension];
|
||||||
|
int rd = _grid->_rdimensions[dimension];
|
||||||
|
int pd = _grid->_processors[dimension];
|
||||||
|
int simd_layout = _grid->_simd_layout[dimension];
|
||||||
|
int comm_dim = _grid->_processors[dimension] >1 ;
|
||||||
|
|
||||||
|
// assert(simd_layout==1); // Why?
|
||||||
|
assert(comm_dim==1);
|
||||||
|
int shift = (shiftpm + fd) %fd;
|
||||||
|
assert(shift>=0);
|
||||||
|
assert(shift<fd);
|
||||||
|
|
||||||
|
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||||
|
_comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and
|
||||||
|
// send to one or more remote nodes.
|
||||||
|
|
||||||
|
int cb= (cbmask==0x2)? Odd : Even;
|
||||||
|
int sshift= _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
|
||||||
|
|
||||||
|
|
||||||
|
for(int x=0;x<rd;x++){
|
||||||
|
|
||||||
|
int sx = (x+sshift)%rd;
|
||||||
|
int comm_proc = ((x+sshift)/rd)%pd;
|
||||||
|
int offnode = (comm_proc!= 0);
|
||||||
|
|
||||||
|
// std::cout << "Stencil shift "<<shift<<" sshift "<<sshift<<" fd "<<fd<<" rd " <<rd<<" offnode "<<offnode<<" sx "<<sx<<std::endl;
|
||||||
|
int wraparound=0;
|
||||||
|
if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) {
|
||||||
|
wraparound = 1;
|
||||||
|
}
|
||||||
|
if ( (shiftpm== 1) && (sx<x) && (grid->_processor_coor[dimension]==grid->_processors[dimension]-1) ) {
|
||||||
|
wraparound = 1;
|
||||||
|
}
|
||||||
|
if (!offnode) {
|
||||||
|
|
||||||
|
int permute_slice=0;
|
||||||
|
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int words = buffer_size;
|
||||||
|
if (cbmask != 0x3) words=words>>1;
|
||||||
|
|
||||||
|
// GatherPlaneSimple (point,dimension,sx,cbmask);
|
||||||
|
|
||||||
|
int rank = grid->_processor;
|
||||||
|
int recv_from_rank;
|
||||||
|
int xmit_to_rank;
|
||||||
|
|
||||||
|
int unified_buffer_offset = _unified_buffer_size;
|
||||||
|
_unified_buffer_size += words;
|
||||||
|
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Routine builds up integer table for each site in _offsets, _is_local, _permute
|
||||||
|
void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute,int wrap)
|
||||||
|
{
|
||||||
|
int rd = _grid->_rdimensions[dimension];
|
||||||
|
|
||||||
|
if ( !_grid->CheckerBoarded(dimension) ) {
|
||||||
|
|
||||||
|
int o = 0; // relative offset to base within plane
|
||||||
|
int ro = rplane*_grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
int lo = lplane*_grid->_ostride[dimension]; // offset in buffer
|
||||||
|
|
||||||
|
// Simple block stride gather of SIMD objects
|
||||||
|
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
|
||||||
|
for(int b=0;b<_grid->_slice_block[dimension];b++){
|
||||||
|
_entries[point][lo+o+b]._offset =ro+o+b;
|
||||||
|
_entries[point][lo+o+b]._is_local=1;
|
||||||
|
_entries[point][lo+o+b]._permute=permute;
|
||||||
|
_entries[point][lo+o+b]._around_the_world=wrap;
|
||||||
|
}
|
||||||
|
o +=_grid->_slice_stride[dimension];
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int ro = rplane*_grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
int lo = lplane*_grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
int o = 0; // relative offset to base within plane
|
||||||
|
|
||||||
|
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
|
||||||
|
for(int b=0;b<_grid->_slice_block[dimension];b++){
|
||||||
|
|
||||||
|
int ocb=1<<_grid->CheckerBoardFromOindex(o+b);
|
||||||
|
|
||||||
|
if ( ocb&cbmask ) {
|
||||||
|
_entries[point][lo+o+b]._offset =ro+o+b;
|
||||||
|
_entries[point][lo+o+b]._is_local=1;
|
||||||
|
_entries[point][lo+o+b]._permute=permute;
|
||||||
|
_entries[point][lo+o+b]._around_the_world=wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
o +=_grid->_slice_stride[dimension];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Routine builds up integer table for each site in _offsets, _is_local, _permute
|
||||||
|
void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset, int wrap)
|
||||||
|
{
|
||||||
|
int rd = _grid->_rdimensions[dimension];
|
||||||
|
|
||||||
|
if ( !_grid->CheckerBoarded(dimension) ) {
|
||||||
|
|
||||||
|
int so = plane*_grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
int o = 0; // relative offset to base within plane
|
||||||
|
int bo = 0; // offset in buffer
|
||||||
|
|
||||||
|
// Simple block stride gather of SIMD objects
|
||||||
|
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
|
||||||
|
for(int b=0;b<_grid->_slice_block[dimension];b++){
|
||||||
|
_entries[point][so+o+b]._offset =offset+(bo++);
|
||||||
|
_entries[point][so+o+b]._is_local=0;
|
||||||
|
_entries[point][so+o+b]._permute=0;
|
||||||
|
_entries[point][so+o+b]._around_the_world=wrap;
|
||||||
|
}
|
||||||
|
o +=_grid->_slice_stride[dimension];
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int so = plane*_grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
int o = 0; // relative offset to base within plane
|
||||||
|
int bo = 0; // offset in buffer
|
||||||
|
|
||||||
|
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
|
||||||
|
for(int b=0;b<_grid->_slice_block[dimension];b++){
|
||||||
|
|
||||||
|
int ocb=1<<_grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||||
|
if ( ocb & cbmask ) {
|
||||||
|
_entries[point][so+o+b]._offset =offset+(bo++);
|
||||||
|
_entries[point][so+o+b]._is_local=0;
|
||||||
|
_entries[point][so+o+b]._permute =0;
|
||||||
|
_entries[point][so+o+b]._around_the_world=wrap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
o +=_grid->_slice_stride[dimension];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CartesianStencil(GridBase *grid,
|
||||||
|
// int npoints,
|
||||||
|
// int checkerboard,
|
||||||
|
// const std::vector<int> &directions,
|
||||||
|
// const std::vector<int> &distances);
|
||||||
|
|
||||||
|
|
||||||
// Add to tables for various cases; is this mistaken. only local if 1 proc in dim
|
// Add to tables for various cases; is this mistaken. only local if 1 proc in dim
|
||||||
// Can this be avoided with simpler coding of comms?
|
// Can this be avoided with simpler coding of comms?
|
||||||
void Local (int point, int dimension,int shift,int cbmask);
|
// void Local (int point, int dimension,int shift,int cbmask);
|
||||||
void Comms (int point, int dimension,int shift,int cbmask);
|
// void Comms (int point, int dimension,int shift,int cbmask);
|
||||||
void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute);
|
// void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute,int wrap);
|
||||||
void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset);
|
// void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset,int wrap);
|
||||||
|
|
||||||
// Could allow a functional munging of the halo to another type during the comms.
|
// Could allow a functional munging of the halo to another type during the comms.
|
||||||
// this could implement the 16bit/32bit/64bit compression.
|
// this could implement the 16bit/32bit/64bit compression.
|
||||||
template<class vobj,class cobj, class compressor> void
|
void HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
|
||||||
HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
|
|
||||||
{
|
{
|
||||||
// conformable(source._grid,_grid);
|
// conformable(source._grid,_grid);
|
||||||
assert(source._grid==_grid);
|
assert(source._grid==_grid);
|
||||||
|
halotime-=usecond();
|
||||||
if (u_comm_buf.size() != _unified_buffer_size ) u_comm_buf.resize(_unified_buffer_size);
|
if (u_comm_buf.size() != _unified_buffer_size ) u_comm_buf.resize(_unified_buffer_size);
|
||||||
int u_comm_offset=0;
|
int u_comm_offset=0;
|
||||||
|
|
||||||
@ -120,24 +431,33 @@ namespace Grid {
|
|||||||
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
|
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
|
||||||
if ( sshift[0] == sshift[1] ) {
|
if ( sshift[0] == sshift[1] ) {
|
||||||
if (splice_dim) {
|
if (splice_dim) {
|
||||||
|
splicetime-=usecond();
|
||||||
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
|
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
|
||||||
|
splicetime+=usecond();
|
||||||
} else {
|
} else {
|
||||||
|
nosplicetime-=usecond();
|
||||||
GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
|
GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
|
||||||
|
nosplicetime+=usecond();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
std::cout << "dim "<<dimension<<"cb "<<_checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||||
if(splice_dim){
|
if(splice_dim){
|
||||||
|
splicetime-=usecond();
|
||||||
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
|
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
|
||||||
GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);// both with block stride loop iteration
|
GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);// both with block stride loop iteration
|
||||||
|
splicetime+=usecond();
|
||||||
} else {
|
} else {
|
||||||
|
nosplicetime-=usecond();
|
||||||
GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);
|
GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);
|
||||||
GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);
|
GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);
|
||||||
|
nosplicetime+=usecond();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
halotime+=usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj,class cobj, class compressor>
|
|
||||||
void GatherStartComms(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
void GatherStartComms(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||||
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
|
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
|
||||||
int &u_comm_offset,compressor & compress)
|
int &u_comm_offset,compressor & compress)
|
||||||
@ -158,28 +478,29 @@ namespace Grid {
|
|||||||
assert(comm_dim==1);
|
assert(comm_dim==1);
|
||||||
assert(shift>=0);
|
assert(shift>=0);
|
||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||||
|
|
||||||
std::vector<cobj,alignedAllocator<cobj> > send_buf(buffer_size); // hmm...
|
if(send_buf.size()<buffer_size) send_buf.resize(buffer_size);
|
||||||
std::vector<cobj,alignedAllocator<cobj> > recv_buf(buffer_size);
|
|
||||||
|
|
||||||
int cb= (cbmask==0x2)? Odd : Even;
|
int cb= (cbmask==0x2)? Odd : Even;
|
||||||
int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||||
|
|
||||||
for(int x=0;x<rd;x++){
|
for(int x=0;x<rd;x++){
|
||||||
|
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
int comm_proc = ((x+sshift)/rd)%pd;
|
int comm_proc = ((x+sshift)/rd)%pd;
|
||||||
|
|
||||||
if (comm_proc) {
|
if (comm_proc) {
|
||||||
|
|
||||||
int words = send_buf.size();
|
int words = buffer_size;
|
||||||
if (cbmask != 0x3) words=words>>1;
|
if (cbmask != 0x3) words=words>>1;
|
||||||
|
|
||||||
int bytes = words * sizeof(cobj);
|
int bytes = words * sizeof(cobj);
|
||||||
|
|
||||||
|
gathertime-=usecond();
|
||||||
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask,compress);
|
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask,compress);
|
||||||
|
gathertime+=usecond();
|
||||||
|
|
||||||
int rank = _grid->_processor;
|
int rank = _grid->_processor;
|
||||||
int recv_from_rank;
|
int recv_from_rank;
|
||||||
@ -189,31 +510,27 @@ namespace Grid {
|
|||||||
assert (recv_from_rank != _grid->ThisRank());
|
assert (recv_from_rank != _grid->ThisRank());
|
||||||
|
|
||||||
// FIXME Implement asynchronous send & also avoid buffer copy
|
// FIXME Implement asynchronous send & also avoid buffer copy
|
||||||
|
commtime-=usecond();
|
||||||
_grid->SendToRecvFrom((void *)&send_buf[0],
|
_grid->SendToRecvFrom((void *)&send_buf[0],
|
||||||
xmit_to_rank,
|
xmit_to_rank,
|
||||||
(void *)&recv_buf[0],
|
(void *)&u_comm_buf[u_comm_offset],
|
||||||
recv_from_rank,
|
recv_from_rank,
|
||||||
bytes);
|
bytes);
|
||||||
|
commtime+=usecond();
|
||||||
|
|
||||||
for(int i=0;i<buffer_size;i++){
|
u_comm_offset+=words;
|
||||||
u_comm_buf[u_comm_offset+i]=recv_buf[i];
|
|
||||||
}
|
|
||||||
u_comm_offset+=buffer_size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class vobj,class cobj, class compressor>
|
|
||||||
void GatherStartCommsSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
void GatherStartCommsSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||||
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
|
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
|
||||||
int &u_comm_offset,compressor &compress)
|
int &u_comm_offset,compressor &compress)
|
||||||
{
|
{
|
||||||
|
buftime-=usecond();
|
||||||
const int Nsimd = _grid->Nsimd();
|
const int Nsimd = _grid->Nsimd();
|
||||||
|
|
||||||
typedef typename cobj::vector_type vector_type;
|
|
||||||
typedef typename cobj::scalar_type scalar_type;
|
|
||||||
typedef typename cobj::scalar_object scalar_object;
|
|
||||||
|
|
||||||
int fd = _grid->_fdimensions[dimension];
|
int fd = _grid->_fdimensions[dimension];
|
||||||
int rd = _grid->_rdimensions[dimension];
|
int rd = _grid->_rdimensions[dimension];
|
||||||
@ -235,17 +552,23 @@ namespace Grid {
|
|||||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||||
int words = sizeof(cobj)/sizeof(vector_type);
|
int words = sizeof(cobj)/sizeof(vector_type);
|
||||||
|
|
||||||
/*
|
assert(cbmask==0x3); // Fixme think there is a latent bug if not true
|
||||||
* possibly slow to allocate
|
|
||||||
* Doesn't matter in this test, but may want to preallocate in the
|
|
||||||
* dirac operators
|
|
||||||
*/
|
|
||||||
std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
|
||||||
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
|
||||||
int bytes = buffer_size*sizeof(scalar_object);
|
|
||||||
|
|
||||||
std::vector<scalar_object *> pointers(Nsimd); //
|
// Should grow to max size and then cost very little thereafter
|
||||||
std::vector<scalar_object *> rpointers(Nsimd); // received pointers
|
send_buf_extract.resize(Nsimd);
|
||||||
|
recv_buf_extract.resize(Nsimd);
|
||||||
|
for(int l=0;l<Nsimd;l++){
|
||||||
|
if( send_buf_extract[l].size() < buffer_size) {
|
||||||
|
send_buf_extract[l].resize(buffer_size);
|
||||||
|
recv_buf_extract[l].resize(buffer_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pointers.resize(Nsimd);
|
||||||
|
rpointers.resize(Nsimd);
|
||||||
|
|
||||||
|
int bytes = buffer_size*sizeof(scalar_object);
|
||||||
|
|
||||||
|
buftime+=usecond();
|
||||||
|
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
// Work out what to send where
|
// Work out what to send where
|
||||||
@ -266,7 +589,9 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
|
|
||||||
|
gathermtime-=usecond();
|
||||||
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
|
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
|
||||||
|
gathermtime+=usecond();
|
||||||
|
|
||||||
for(int i=0;i<Nsimd;i++){
|
for(int i=0;i<Nsimd;i++){
|
||||||
|
|
||||||
@ -293,11 +618,13 @@ namespace Grid {
|
|||||||
|
|
||||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
|
||||||
|
commstime-=usecond();
|
||||||
_grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0],
|
_grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0],
|
||||||
xmit_to_rank,
|
xmit_to_rank,
|
||||||
(void *)&recv_buf_extract[i][0],
|
(void *)&recv_buf_extract[i][0],
|
||||||
recv_from_rank,
|
recv_from_rank,
|
||||||
bytes);
|
bytes);
|
||||||
|
commstime+=usecond();
|
||||||
|
|
||||||
rpointers[i] = &recv_buf_extract[i][0];
|
rpointers[i] = &recv_buf_extract[i][0];
|
||||||
|
|
||||||
@ -307,11 +634,13 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Here we don't want to scatter, just place into a buffer.
|
// Here we don't want to scatter, just place into a buffer.
|
||||||
|
mergetime-=usecond();
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
for(int i=0;i<buffer_size;i++){
|
for(int i=0;i<buffer_size;i++){
|
||||||
assert(u_comm_offset+i<_unified_buffer_size);
|
// assert(u_comm_offset+i<_unified_buffer_size);
|
||||||
merge(u_comm_buf[u_comm_offset+i],rpointers,i);
|
merge(u_comm_buf[u_comm_offset+i],rpointers,i);
|
||||||
}
|
}
|
||||||
|
mergetime+=usecond();
|
||||||
u_comm_offset+=buffer_size;
|
u_comm_offset+=buffer_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,11 +8,12 @@
|
|||||||
#include <tensors/Tensor_outer.h>
|
#include <tensors/Tensor_outer.h>
|
||||||
#include <tensors/Tensor_transpose.h>
|
#include <tensors/Tensor_transpose.h>
|
||||||
#include <tensors/Tensor_trace.h>
|
#include <tensors/Tensor_trace.h>
|
||||||
|
#include <tensors/Tensor_index.h>
|
||||||
#include <tensors/Tensor_Ta.h>
|
#include <tensors/Tensor_Ta.h>
|
||||||
#include <tensors/Tensor_determinant.h>
|
#include <tensors/Tensor_determinant.h>
|
||||||
#include <tensors/Tensor_exp.h>
|
#include <tensors/Tensor_exp.h>
|
||||||
#include <tensors/Tensor_peek.h>
|
//#include <tensors/Tensor_peek.h>
|
||||||
#include <tensors/Tensor_poke.h>
|
//#include <tensors/Tensor_poke.h>
|
||||||
#include <tensors/Tensor_reality.h>
|
#include <tensors/Tensor_reality.h>
|
||||||
#include <tensors/Tensor_unary.h>
|
#include <tensors/Tensor_unary.h>
|
||||||
#include <tensors/Tensor_extract_merge.h>
|
#include <tensors/Tensor_extract_merge.h>
|
||||||
|
@ -24,7 +24,16 @@ namespace Grid {
|
|||||||
class GridThread {
|
class GridThread {
|
||||||
public:
|
public:
|
||||||
static int _threads;
|
static int _threads;
|
||||||
|
static int _hyperthreads;
|
||||||
|
static int _cores;
|
||||||
|
|
||||||
|
static void SetCores(int cr) {
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
_cores = cr;
|
||||||
|
#else
|
||||||
|
_cores = 1;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
static void SetThreads(int thr) {
|
static void SetThreads(int thr) {
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
_threads = MIN(thr,omp_get_max_threads()) ;
|
_threads = MIN(thr,omp_get_max_threads()) ;
|
||||||
@ -35,22 +44,28 @@ class GridThread {
|
|||||||
};
|
};
|
||||||
static void SetMaxThreads(void) {
|
static void SetMaxThreads(void) {
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
|
// setenv("KMP_AFFINITY","balanced",1);
|
||||||
_threads = omp_get_max_threads();
|
_threads = omp_get_max_threads();
|
||||||
omp_set_num_threads(_threads);
|
omp_set_num_threads(_threads);
|
||||||
#else
|
#else
|
||||||
_threads = 1;
|
_threads = 1;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
static int GetHyperThreads(void) { assert(_threads%_cores ==0); return _threads/_cores; };
|
||||||
|
static int GetCores(void) { return _cores; };
|
||||||
static int GetThreads(void) { return _threads; };
|
static int GetThreads(void) { return _threads; };
|
||||||
static int SumArraySize(void) {return _threads;};
|
static int SumArraySize(void) {return _threads;};
|
||||||
|
|
||||||
static void GetWork(int nwork, int me, int & mywork, int & myoff){
|
static void GetWork(int nwork, int me, int & mywork, int & myoff){
|
||||||
int basework = nwork/_threads;
|
GetWork(nwork,me,mywork,myoff,_threads);
|
||||||
int backfill = _threads-(nwork%_threads);
|
}
|
||||||
if ( me >= _threads ) {
|
static void GetWork(int nwork, int me, int & mywork, int & myoff,int units){
|
||||||
|
int basework = nwork/units;
|
||||||
|
int backfill = units-(nwork%units);
|
||||||
|
if ( me >= units ) {
|
||||||
mywork = myoff = 0;
|
mywork = myoff = 0;
|
||||||
} else {
|
} else {
|
||||||
mywork = (nwork+me)/_threads;
|
mywork = (nwork+me)/units;
|
||||||
myoff = basework * me;
|
myoff = basework * me;
|
||||||
if ( me > backfill )
|
if ( me > backfill )
|
||||||
myoff+= (me-backfill);
|
myoff+= (me-backfill);
|
||||||
|
52
lib/Timer.h
Normal file
52
lib/Timer.h
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
#ifndef GRID_TIME_H
|
||||||
|
#define GRID_TIME_H
|
||||||
|
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <ctime>
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
|
// Dress the output; use std::chrono
|
||||||
|
|
||||||
|
// C++11 time facilities better?
|
||||||
|
double usecond(void);
|
||||||
|
|
||||||
|
typedef std::chrono::system_clock GridClock;
|
||||||
|
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
||||||
|
typedef std::chrono::milliseconds GridTime;
|
||||||
|
|
||||||
|
|
||||||
|
class GridStopWatch {
|
||||||
|
private:
|
||||||
|
bool running;
|
||||||
|
GridTimePoint start;
|
||||||
|
GridTime accumulator;
|
||||||
|
public:
|
||||||
|
GridStopWatch () {
|
||||||
|
Reset();
|
||||||
|
}
|
||||||
|
void Start(void) {
|
||||||
|
assert(running == false);
|
||||||
|
start = GridClock::now();
|
||||||
|
running = true;
|
||||||
|
}
|
||||||
|
void Stop(void) {
|
||||||
|
assert(running == true);
|
||||||
|
accumulator+= std::chrono::duration_cast<GridTime>(GridClock::now()-start);
|
||||||
|
running = false;
|
||||||
|
};
|
||||||
|
void Reset(void){
|
||||||
|
running = false;
|
||||||
|
start = GridClock::now();
|
||||||
|
accumulator = std::chrono::duration_cast<GridTime>(start-start);
|
||||||
|
}
|
||||||
|
GridTime Elapsed(void) {
|
||||||
|
assert(running == false);
|
||||||
|
return accumulator;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -12,9 +12,6 @@ namespace Grid {
|
|||||||
std::vector<int> directions ;
|
std::vector<int> directions ;
|
||||||
std::vector<int> displacements;
|
std::vector<int> displacements;
|
||||||
|
|
||||||
// FIXME -- don't like xposing the operator directions
|
|
||||||
// as different to the geometrical dirs
|
|
||||||
// Also don't like special casing five dim.. should pass an object in template
|
|
||||||
Geometry(int _d) {
|
Geometry(int _d) {
|
||||||
|
|
||||||
int base = (_d==5) ? 1:0;
|
int base = (_d==5) ? 1:0;
|
||||||
@ -35,12 +32,12 @@ namespace Grid {
|
|||||||
displacements[2*_d]=0;
|
displacements[2*_d]=0;
|
||||||
|
|
||||||
//// report back
|
//// report back
|
||||||
std::cout<<"directions :";
|
std::cout<<GridLogMessage<<"directions :";
|
||||||
for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
|
for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
|
||||||
std::cout <<std::endl;
|
std::cout <<std::endl;
|
||||||
std::cout<<"displacements :";
|
std::cout<<GridLogMessage<<"displacements :";
|
||||||
for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
|
for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
|
||||||
std::cout <<std::endl;
|
std::cout<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -64,6 +61,97 @@ namespace Grid {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<class Fobj,class CComplex,int nbasis>
|
||||||
|
class Aggregation {
|
||||||
|
public:
|
||||||
|
typedef iVector<CComplex,nbasis > siteVector;
|
||||||
|
typedef Lattice<siteVector> CoarseVector;
|
||||||
|
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
|
||||||
|
|
||||||
|
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
||||||
|
typedef Lattice<Fobj > FineField;
|
||||||
|
|
||||||
|
GridBase *CoarseGrid;
|
||||||
|
GridBase *FineGrid;
|
||||||
|
std::vector<Lattice<Fobj> > subspace;
|
||||||
|
|
||||||
|
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) :
|
||||||
|
CoarseGrid(_CoarseGrid),
|
||||||
|
FineGrid(_FineGrid),
|
||||||
|
subspace(nbasis,_FineGrid)
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
void Orthogonalise(void){
|
||||||
|
CoarseScalar InnerProd(CoarseGrid);
|
||||||
|
blockOrthogonalise(InnerProd,subspace);
|
||||||
|
}
|
||||||
|
void CheckOrthogonal(void){
|
||||||
|
CoarseVector iProj(CoarseGrid);
|
||||||
|
CoarseVector eProj(CoarseGrid);
|
||||||
|
Lattice<CComplex> pokey(CoarseGrid);
|
||||||
|
|
||||||
|
|
||||||
|
for(int i=0;i<nbasis;i++){
|
||||||
|
blockProject(iProj,subspace[i],subspace);
|
||||||
|
|
||||||
|
eProj=zero;
|
||||||
|
for(int ss=0;ss<CoarseGrid->oSites();ss++){
|
||||||
|
eProj._odata[ss](i)=CComplex(1.0);
|
||||||
|
}
|
||||||
|
eProj=eProj - iProj;
|
||||||
|
std::cout<<GridLogMessage<<"Orthog check error "<<i<<" " << norm2(eProj)<<std::endl;
|
||||||
|
}
|
||||||
|
std::cout<<GridLogMessage <<"CheckOrthog done"<<std::endl;
|
||||||
|
}
|
||||||
|
void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
|
||||||
|
blockProject(CoarseVec,FineVec,subspace);
|
||||||
|
}
|
||||||
|
void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
|
||||||
|
blockPromote(CoarseVec,FineVec,subspace);
|
||||||
|
}
|
||||||
|
void CreateSubspaceRandom(GridParallelRNG &RNG){
|
||||||
|
for(int i=0;i<nbasis;i++){
|
||||||
|
random(RNG,subspace[i]);
|
||||||
|
std::cout<<GridLogMessage<<" norm subspace["<<i<<"] "<<norm2(subspace[i])<<std::endl;
|
||||||
|
}
|
||||||
|
Orthogonalise();
|
||||||
|
}
|
||||||
|
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
|
||||||
|
|
||||||
|
RealD scale;
|
||||||
|
|
||||||
|
ConjugateGradient<FineField> CG(1.0e-2,10000);
|
||||||
|
FineField noise(FineGrid);
|
||||||
|
FineField Mn(FineGrid);
|
||||||
|
|
||||||
|
for(int b=0;b<nn;b++){
|
||||||
|
|
||||||
|
gaussian(RNG,noise);
|
||||||
|
scale = std::pow(norm2(noise),-0.5);
|
||||||
|
noise=noise*scale;
|
||||||
|
|
||||||
|
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
||||||
|
|
||||||
|
for(int i=0;i<1;i++){
|
||||||
|
|
||||||
|
CG(hermop,noise,subspace[b]);
|
||||||
|
|
||||||
|
noise = subspace[b];
|
||||||
|
scale = std::pow(norm2(noise),-0.5);
|
||||||
|
noise=noise*scale;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl;
|
||||||
|
subspace[b] = noise;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Orthogonalise();
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
// Fine Object == (per site) type of fine field
|
// Fine Object == (per site) type of fine field
|
||||||
// nbasis == number of deflation vectors
|
// nbasis == number of deflation vectors
|
||||||
template<class Fobj,class CComplex,int nbasis>
|
template<class Fobj,class CComplex,int nbasis>
|
||||||
@ -82,7 +170,7 @@ namespace Grid {
|
|||||||
////////////////////
|
////////////////////
|
||||||
Geometry geom;
|
Geometry geom;
|
||||||
GridBase * _grid;
|
GridBase * _grid;
|
||||||
CartesianStencil Stencil;
|
CartesianStencil<siteVector,siteVector,SimpleCompressor<siteVector> > Stencil;
|
||||||
|
|
||||||
std::vector<CoarseMatrix> A;
|
std::vector<CoarseMatrix> A;
|
||||||
|
|
||||||
@ -101,24 +189,22 @@ namespace Grid {
|
|||||||
SimpleCompressor<siteVector> compressor;
|
SimpleCompressor<siteVector> compressor;
|
||||||
Stencil.HaloExchange(in,comm_buf,compressor);
|
Stencil.HaloExchange(in,comm_buf,compressor);
|
||||||
|
|
||||||
//PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<Grid()->oSites();ss++){
|
for(int ss=0;ss<Grid()->oSites();ss++){
|
||||||
siteVector res = zero;
|
siteVector res = zero;
|
||||||
siteVector nbr;
|
siteVector nbr;
|
||||||
int offset,local,perm,ptype;
|
int ptype;
|
||||||
|
StencilEntry *SE;
|
||||||
for(int point=0;point<geom.npoint;point++){
|
for(int point=0;point<geom.npoint;point++){
|
||||||
offset = Stencil._offsets [point][ss];
|
|
||||||
local = Stencil._is_local[point][ss];
|
SE=Stencil.GetEntry(ptype,point,ss);
|
||||||
perm = Stencil._permute [point][ss];
|
|
||||||
ptype = Stencil._permute_type[point];
|
|
||||||
|
|
||||||
if(local&&perm) {
|
if(SE->_is_local&&SE->_permute) {
|
||||||
permute(nbr,in._odata[offset],ptype);
|
permute(nbr,in._odata[SE->_offset],ptype);
|
||||||
} else if(local) {
|
} else if(SE->_is_local) {
|
||||||
nbr = in._odata[offset];
|
nbr = in._odata[SE->_offset];
|
||||||
} else {
|
} else {
|
||||||
nbr = comm_buf[offset];
|
nbr = comm_buf[SE->_offset];
|
||||||
}
|
}
|
||||||
res = res + A[point]._odata[ss]*nbr;
|
res = res + A[point]._odata[ss]*nbr;
|
||||||
}
|
}
|
||||||
@ -145,7 +231,8 @@ namespace Grid {
|
|||||||
comm_buf.resize(Stencil._unified_buffer_size);
|
comm_buf.resize(Stencil._unified_buffer_size);
|
||||||
};
|
};
|
||||||
|
|
||||||
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,std::vector<Lattice<Fobj> > & subspace){
|
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||||
|
Aggregation<Fobj,CComplex,nbasis> & Subspace){
|
||||||
|
|
||||||
FineField iblock(FineGrid); // contributions from within this block
|
FineField iblock(FineGrid); // contributions from within this block
|
||||||
FineField oblock(FineGrid); // contributions from outwith this block
|
FineField oblock(FineGrid); // contributions from outwith this block
|
||||||
@ -162,8 +249,7 @@ namespace Grid {
|
|||||||
CoarseScalar InnerProd(Grid());
|
CoarseScalar InnerProd(Grid());
|
||||||
|
|
||||||
// Orthogonalise the subblocks over the basis
|
// Orthogonalise the subblocks over the basis
|
||||||
blockOrthogonalise(InnerProd,subspace);
|
blockOrthogonalise(InnerProd,Subspace.subspace);
|
||||||
blockProject(iProj,subspace[0],subspace);
|
|
||||||
|
|
||||||
// Compute the matrix elements of linop between this orthonormal
|
// Compute the matrix elements of linop between this orthonormal
|
||||||
// set of vectors.
|
// set of vectors.
|
||||||
@ -177,7 +263,10 @@ namespace Grid {
|
|||||||
assert(self_stencil!=-1);
|
assert(self_stencil!=-1);
|
||||||
|
|
||||||
for(int i=0;i<nbasis;i++){
|
for(int i=0;i<nbasis;i++){
|
||||||
phi=subspace[i];
|
phi=Subspace.subspace[i];
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"("<<i<<").."<<std::endl;
|
||||||
|
|
||||||
for(int p=0;p<geom.npoint;p++){
|
for(int p=0;p<geom.npoint;p++){
|
||||||
|
|
||||||
int dir = geom.directions[p];
|
int dir = geom.directions[p];
|
||||||
@ -210,8 +299,11 @@ namespace Grid {
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
blockProject(iProj,iblock,subspace);
|
Subspace.ProjectToSubspace(iProj,iblock);
|
||||||
blockProject(oProj,oblock,subspace);
|
Subspace.ProjectToSubspace(oProj,oblock);
|
||||||
|
// blockProject(iProj,iblock,Subspace.subspace);
|
||||||
|
// blockProject(oProj,oblock,Subspace.subspace);
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<Grid()->oSites();ss++){
|
for(int ss=0;ss<Grid()->oSites();ss++){
|
||||||
for(int j=0;j<nbasis;j++){
|
for(int j=0;j<nbasis;j++){
|
||||||
if( disp!= 0 ) {
|
if( disp!= 0 ) {
|
||||||
@ -227,33 +319,33 @@ namespace Grid {
|
|||||||
///////////////////////////
|
///////////////////////////
|
||||||
// test code worth preserving in if block
|
// test code worth preserving in if block
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
std::cout<< " Computed matrix elements "<< self_stencil <<std::endl;
|
std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl;
|
||||||
for(int p=0;p<geom.npoint;p++){
|
for(int p=0;p<geom.npoint;p++){
|
||||||
std::cout<< "A["<<p<<"]" << std::endl;
|
std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl;
|
||||||
std::cout<< A[p] << std::endl;
|
std::cout<<GridLogMessage<< A[p] << std::endl;
|
||||||
}
|
}
|
||||||
std::cout<< " picking by block0 "<< self_stencil <<std::endl;
|
std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl;
|
||||||
|
|
||||||
phi=subspace[0];
|
phi=Subspace.subspace[0];
|
||||||
std::vector<int> bc(FineGrid->_ndimension,0);
|
std::vector<int> bc(FineGrid->_ndimension,0);
|
||||||
|
|
||||||
blockPick(Grid(),phi,tmp,bc); // Pick out a block
|
blockPick(Grid(),phi,tmp,bc); // Pick out a block
|
||||||
linop.Op(tmp,Mphi); // Apply big dop
|
linop.Op(tmp,Mphi); // Apply big dop
|
||||||
blockProject(iProj,Mphi,subspace); // project it and print it
|
blockProject(iProj,Mphi,Subspace.subspace); // project it and print it
|
||||||
std::cout<< " Computed matrix elements from block zero only "<<std::endl;
|
std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl;
|
||||||
std::cout<< iProj <<std::endl;
|
std::cout<<GridLogMessage<< iProj <<std::endl;
|
||||||
std::cout<<"Computed Coarse Operator"<<std::endl;
|
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
// AssertHermitian();
|
|
||||||
// ForceHermitian();
|
// ForceHermitian();
|
||||||
// ForceDiagonal();
|
AssertHermitian();
|
||||||
|
// ForceDiagonal();
|
||||||
}
|
}
|
||||||
void ForceDiagonal(void) {
|
void ForceDiagonal(void) {
|
||||||
|
|
||||||
|
|
||||||
std::cout<<"**************************************************"<<std::endl;
|
std::cout<<GridLogMessage<<"**************************************************"<<std::endl;
|
||||||
std::cout<<"**** Forcing coarse operator to be diagonal ****"<<std::endl;
|
std::cout<<GridLogMessage<<"**** Forcing coarse operator to be diagonal ****"<<std::endl;
|
||||||
std::cout<<"**************************************************"<<std::endl;
|
std::cout<<GridLogMessage<<"**************************************************"<<std::endl;
|
||||||
for(int p=0;p<8;p++){
|
for(int p=0;p<8;p++){
|
||||||
A[p]=zero;
|
A[p]=zero;
|
||||||
}
|
}
|
||||||
@ -263,7 +355,7 @@ namespace Grid {
|
|||||||
|
|
||||||
Complex one(1.0);
|
Complex one(1.0);
|
||||||
|
|
||||||
iMatrix<Complex,nbasis> ident; ident=one;
|
iMatrix<CComplex,nbasis> ident; ident=one;
|
||||||
|
|
||||||
val = val*adj(val);
|
val = val*adj(val);
|
||||||
val = val + 1.0;
|
val = val + 1.0;
|
||||||
@ -279,7 +371,7 @@ namespace Grid {
|
|||||||
int dd=d+1;
|
int dd=d+1;
|
||||||
A[2*d] = adj(Cshift(A[2*d+1],dd,1));
|
A[2*d] = adj(Cshift(A[2*d+1],dd,1));
|
||||||
}
|
}
|
||||||
A[8] = 0.5*(A[8] + adj(A[8]));
|
// A[8] = 0.5*(A[8] + adj(A[8]));
|
||||||
}
|
}
|
||||||
void AssertHermitian(void) {
|
void AssertHermitian(void) {
|
||||||
CoarseMatrix AA (Grid());
|
CoarseMatrix AA (Grid());
|
||||||
@ -293,13 +385,13 @@ namespace Grid {
|
|||||||
|
|
||||||
Diff = AA - adj(AAc);
|
Diff = AA - adj(AAc);
|
||||||
|
|
||||||
std::cout<<"Norm diff dim "<<d<<" "<< norm2(Diff)<<std::endl;
|
std::cout<<GridLogMessage<<"Norm diff dim "<<d<<" "<< norm2(Diff)<<std::endl;
|
||||||
std::cout<<"Norm dim "<<d<<" "<< norm2(AA)<<std::endl;
|
std::cout<<GridLogMessage<<"Norm dim "<<d<<" "<< norm2(AA)<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
Diff = A[8] - adj(A[8]);
|
Diff = A[8] - adj(A[8]);
|
||||||
std::cout<<"Norm diff local "<< norm2(Diff)<<std::endl;
|
std::cout<<GridLogMessage<<"Norm diff local "<< norm2(Diff)<<std::endl;
|
||||||
std::cout<<"Norm local "<< norm2(A[8])<<std::endl;
|
std::cout<<GridLogMessage<<"Norm local "<< norm2(A[8])<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -71,6 +71,47 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
// Construct herm op and shift it for mgrid smoother
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
template<class Matrix,class Field>
|
||||||
|
class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||||
|
Matrix &_Mat;
|
||||||
|
RealD _shift;
|
||||||
|
public:
|
||||||
|
ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
|
||||||
|
// Support for coarsening to a multigrid
|
||||||
|
void OpDiag (const Field &in, Field &out) {
|
||||||
|
_Mat.Mdiag(in,out);
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||||
|
_Mat.Mdir(in,out,dir,disp);
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void Op (const Field &in, Field &out){
|
||||||
|
_Mat.M(in,out);
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void AdjOp (const Field &in, Field &out){
|
||||||
|
_Mat.Mdag(in,out);
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
|
_Mat.MdagM(in,out,n1,n2);
|
||||||
|
out = out + _shift*in;
|
||||||
|
|
||||||
|
ComplexD dot;
|
||||||
|
dot= innerProduct(in,out);
|
||||||
|
n1=real(dot);
|
||||||
|
n2=norm2(out);
|
||||||
|
}
|
||||||
|
void HermOp(const Field &in, Field &out){
|
||||||
|
RealD n1,n2;
|
||||||
|
HermOpAndNorm(in,out,n1,n2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// Wrap an already herm matrix
|
// Wrap an already herm matrix
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
@ -147,6 +188,7 @@ namespace Grid {
|
|||||||
};
|
};
|
||||||
template<class Matrix,class Field>
|
template<class Matrix,class Field>
|
||||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||||
|
protected:
|
||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
public:
|
public:
|
||||||
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
@ -173,6 +215,7 @@ namespace Grid {
|
|||||||
};
|
};
|
||||||
template<class Matrix,class Field>
|
template<class Matrix,class Field>
|
||||||
class SchurDiagOneOperator : public SchurOperatorBase<Field> {
|
class SchurDiagOneOperator : public SchurOperatorBase<Field> {
|
||||||
|
protected:
|
||||||
Matrix &_Mat;
|
Matrix &_Mat;
|
||||||
public:
|
public:
|
||||||
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
@ -199,6 +242,7 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Base classes for functions of operators
|
// Base classes for functions of operators
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
@ -207,6 +251,11 @@ namespace Grid {
|
|||||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<class Field> class LinearFunction {
|
||||||
|
public:
|
||||||
|
virtual void operator() (const Field &in, Field &out) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Base classes for Multishift solvers for operators
|
// Base classes for Multishift solvers for operators
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
|
19
lib/algorithms/Preconditioner.h
Normal file
19
lib/algorithms/Preconditioner.h
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#ifndef GRID_PRECONDITIONER_H
|
||||||
|
#define GRID_PRECONDITIONER_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
template<class Field> class Preconditioner : public LinearFunction<Field> {
|
||||||
|
virtual void operator()(const Field &src, Field & psi)=0;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Field> class TrivialPrecon : public Preconditioner<Field> {
|
||||||
|
public:
|
||||||
|
void operator()(const Field &src, Field & psi){
|
||||||
|
psi = src;
|
||||||
|
}
|
||||||
|
TrivialPrecon(void){};
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -9,23 +9,34 @@ namespace Grid {
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Simple general polynomial with user supplied coefficients
|
// Simple general polynomial with user supplied coefficients
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class Field>
|
||||||
|
class HermOpOperatorFunction : public OperatorFunction<Field> {
|
||||||
|
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||||
|
Linop.HermOp(in,out);
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
template<class Field>
|
template<class Field>
|
||||||
class Polynomial : public OperatorFunction<Field> {
|
class Polynomial : public OperatorFunction<Field> {
|
||||||
private:
|
private:
|
||||||
std::vector<double> Coeffs;
|
std::vector<RealD> Coeffs;
|
||||||
public:
|
public:
|
||||||
Polynomial(std::vector<double> &_Coeffs) : Coeffs(_Coeffs) {};
|
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
|
||||||
|
|
||||||
// Implement the required interface
|
// Implement the required interface
|
||||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||||
|
|
||||||
Field AtoN = in;
|
Field AtoN(in._grid);
|
||||||
|
Field Mtmp(in._grid);
|
||||||
|
AtoN = in;
|
||||||
out = AtoN*Coeffs[0];
|
out = AtoN*Coeffs[0];
|
||||||
|
// std::cout <<"Poly in " <<norm2(in)<<std::endl;
|
||||||
|
// std::cout <<"0 " <<norm2(out)<<std::endl;
|
||||||
for(int n=1;n<Coeffs.size();n++){
|
for(int n=1;n<Coeffs.size();n++){
|
||||||
Field Mtmp=AtoN;
|
Mtmp = AtoN;
|
||||||
Linop.Op(Mtmp,AtoN);
|
Linop.HermOp(Mtmp,AtoN);
|
||||||
out=out+AtoN*Coeffs[n];
|
out=out+AtoN*Coeffs[n];
|
||||||
|
// std::cout << n<<" " <<norm2(out)<<std::endl;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@ -36,21 +47,36 @@ namespace Grid {
|
|||||||
template<class Field>
|
template<class Field>
|
||||||
class Chebyshev : public OperatorFunction<Field> {
|
class Chebyshev : public OperatorFunction<Field> {
|
||||||
private:
|
private:
|
||||||
std::vector<double> Coeffs;
|
std::vector<RealD> Coeffs;
|
||||||
int order;
|
int order;
|
||||||
double hi;
|
RealD hi;
|
||||||
double lo;
|
RealD lo;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void csv(std::ostream &out){
|
void csv(std::ostream &out){
|
||||||
for (double x=lo; x<hi; x+=(hi-lo)/1000) {
|
for (RealD x=lo; x<hi; x+=(hi-lo)/1000) {
|
||||||
double f = approx(x);
|
RealD f = approx(x);
|
||||||
out<< x<<" "<<f<<std::endl;
|
out<< x<<" "<<f<<std::endl;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Chebyshev(double _lo,double _hi,int _order, double (* func)(double) ){
|
// Convenience for plotting the approximation
|
||||||
|
void PlotApprox(std::ostream &out) {
|
||||||
|
out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
|
||||||
|
for(RealD x=lo;x<hi;x+=(hi-lo)/50.0){
|
||||||
|
out <<x<<"\t"<<approx(x)<<std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Chebyshev(){};
|
||||||
|
Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
|
||||||
|
{
|
||||||
lo=_lo;
|
lo=_lo;
|
||||||
hi=_hi;
|
hi=_hi;
|
||||||
order=_order;
|
order=_order;
|
||||||
@ -58,29 +84,58 @@ namespace Grid {
|
|||||||
if(order < 2) exit(-1);
|
if(order < 2) exit(-1);
|
||||||
Coeffs.resize(order);
|
Coeffs.resize(order);
|
||||||
for(int j=0;j<order;j++){
|
for(int j=0;j<order;j++){
|
||||||
double s=0;
|
RealD s=0;
|
||||||
for(int k=0;k<order;k++){
|
for(int k=0;k<order;k++){
|
||||||
double y=std::cos(M_PI*(k+0.5)/order);
|
RealD y=std::cos(M_PI*(k+0.5)/order);
|
||||||
double x=0.5*(y*(hi-lo)+(hi+lo));
|
RealD x=0.5*(y*(hi-lo)+(hi+lo));
|
||||||
double f=func(x);
|
RealD f=func(x);
|
||||||
s=s+f*std::cos( j*M_PI*(k+0.5)/order );
|
s=s+f*std::cos( j*M_PI*(k+0.5)/order );
|
||||||
}
|
}
|
||||||
Coeffs[j] = s * 2.0/order;
|
Coeffs[j] = s * 2.0/order;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
double approx(double x) // Convenience for plotting the approximation
|
|
||||||
|
void JacksonSmooth(void){
|
||||||
|
RealD M=order;
|
||||||
|
RealD alpha = M_PI/(M+2);
|
||||||
|
RealD lmax = std::cos(alpha);
|
||||||
|
RealD sumUsq =0;
|
||||||
|
std::vector<RealD> U(M);
|
||||||
|
std::vector<RealD> a(M);
|
||||||
|
std::vector<RealD> g(M);
|
||||||
|
for(int n=0;n<=M;n++){
|
||||||
|
U[n] = std::sin((n+1)*std::acos(lmax))/std::sin(std::acos(lmax));
|
||||||
|
sumUsq += U[n]*U[n];
|
||||||
|
}
|
||||||
|
sumUsq = std::sqrt(sumUsq);
|
||||||
|
|
||||||
|
for(int i=1;i<=M;i++){
|
||||||
|
a[i] = U[i]/sumUsq;
|
||||||
|
}
|
||||||
|
g[0] = 1.0;
|
||||||
|
for(int m=1;m<=M;m++){
|
||||||
|
g[m] = 0;
|
||||||
|
for(int i=0;i<=M-m;i++){
|
||||||
|
g[m]+= a[i]*a[m+i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int m=1;m<=M;m++){
|
||||||
|
Coeffs[m]*=g[m];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RealD approx(RealD x) // Convenience for plotting the approximation
|
||||||
{
|
{
|
||||||
double Tn;
|
RealD Tn;
|
||||||
double Tnm;
|
RealD Tnm;
|
||||||
double Tnp;
|
RealD Tnp;
|
||||||
|
|
||||||
double y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||||
|
|
||||||
double T0=1;
|
RealD T0=1;
|
||||||
double T1=y;
|
RealD T1=y;
|
||||||
|
|
||||||
double sum;
|
RealD sum;
|
||||||
sum = 0.5*Coeffs[0]*T0;
|
sum = 0.5*Coeffs[0]*T0;
|
||||||
sum+= Coeffs[1]*T1;
|
sum+= Coeffs[1]*T1;
|
||||||
|
|
||||||
@ -95,46 +150,38 @@ namespace Grid {
|
|||||||
return sum;
|
return sum;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Convenience for plotting the approximation
|
// Implement the required interface
|
||||||
void PlotApprox(std::ostream &out) {
|
|
||||||
out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
|
|
||||||
for(double x=lo;x<hi;x+=(hi-lo)/50.0){
|
|
||||||
out <<x<<"\t"<<approx(x)<<std::endl;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Implement the required interface; could require Lattice base class
|
|
||||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||||
|
|
||||||
Field T0 = in;
|
GridBase *grid=in._grid;
|
||||||
Field T1 = T0; // Field T1(T0._grid); more efficient but hardwires Lattice class
|
|
||||||
Field T2 = T1;
|
int vol=grid->gSites();
|
||||||
|
|
||||||
|
Field T0(grid); T0 = in;
|
||||||
|
Field T1(grid);
|
||||||
|
Field T2(grid);
|
||||||
|
Field y(grid);
|
||||||
|
|
||||||
// use a pointer trick to eliminate copies
|
|
||||||
Field *Tnm = &T0;
|
Field *Tnm = &T0;
|
||||||
Field *Tn = &T1;
|
Field *Tn = &T1;
|
||||||
Field *Tnp = &T2;
|
Field *Tnp = &T2;
|
||||||
Field y = in;
|
|
||||||
|
|
||||||
double xscale = 2.0/(hi-lo);
|
|
||||||
double mscale = -(hi+lo)/(hi-lo);
|
|
||||||
|
|
||||||
// Tn=T1 = (xscale M + mscale)in
|
// Tn=T1 = (xscale M + mscale)in
|
||||||
Linop.Op(T0,y);
|
RealD xscale = 2.0/(hi-lo);
|
||||||
|
RealD mscale = -(hi+lo)/(hi-lo);
|
||||||
|
Linop.HermOp(T0,y);
|
||||||
T1=y*xscale+in*mscale;
|
T1=y*xscale+in*mscale;
|
||||||
|
|
||||||
// sum = .5 c[0] T0 + c[1] T1
|
// sum = .5 c[0] T0 + c[1] T1
|
||||||
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
|
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
|
||||||
|
|
||||||
for(int n=2;n<order;n++){
|
for(int n=2;n<order;n++){
|
||||||
|
|
||||||
Linop.Op(*Tn,y);
|
Linop.HermOp(*Tn,y);
|
||||||
|
|
||||||
y=xscale*y+mscale*(*Tn);
|
y=xscale*y+mscale*(*Tn);
|
||||||
|
|
||||||
*Tnp=2.0*y-(*Tnm);
|
*Tnp=2.0*y-(*Tnm);
|
||||||
|
|
||||||
out=out+Coeffs[n]* (*Tnp);
|
out=out+Coeffs[n]* (*Tnp);
|
||||||
|
|
||||||
// Cycle pointers to avoid copies
|
// Cycle pointers to avoid copies
|
||||||
@ -148,5 +195,121 @@ namespace Grid {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class ChebyshevLanczos : public Chebyshev<Field> {
|
||||||
|
private:
|
||||||
|
std::vector<RealD> Coeffs;
|
||||||
|
int order;
|
||||||
|
RealD alpha;
|
||||||
|
RealD beta;
|
||||||
|
RealD mu;
|
||||||
|
|
||||||
|
public:
|
||||||
|
ChebyshevLanczos(RealD _alpha,RealD _beta,RealD _mu,int _order) :
|
||||||
|
alpha(_alpha),
|
||||||
|
beta(_beta),
|
||||||
|
mu(_mu)
|
||||||
|
{
|
||||||
|
order=_order;
|
||||||
|
Coeffs.resize(order);
|
||||||
|
for(int i=0;i<_order;i++){
|
||||||
|
Coeffs[i] = 0.0;
|
||||||
|
}
|
||||||
|
Coeffs[order-1]=1.0;
|
||||||
|
};
|
||||||
|
|
||||||
|
void csv(std::ostream &out){
|
||||||
|
for (RealD x=-1.2*alpha; x<1.2*alpha; x+=(2.0*alpha)/10000) {
|
||||||
|
RealD f = approx(x);
|
||||||
|
out<< x<<" "<<f<<std::endl;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD approx(RealD xx) // Convenience for plotting the approximation
|
||||||
|
{
|
||||||
|
RealD Tn;
|
||||||
|
RealD Tnm;
|
||||||
|
RealD Tnp;
|
||||||
|
Real aa = alpha * alpha;
|
||||||
|
Real bb = beta * beta;
|
||||||
|
|
||||||
|
RealD x = ( 2.0 * (xx-mu)*(xx-mu) - (aa+bb) ) / (aa-bb);
|
||||||
|
|
||||||
|
RealD y= x;
|
||||||
|
|
||||||
|
RealD T0=1;
|
||||||
|
RealD T1=y;
|
||||||
|
|
||||||
|
RealD sum;
|
||||||
|
sum = 0.5*Coeffs[0]*T0;
|
||||||
|
sum+= Coeffs[1]*T1;
|
||||||
|
|
||||||
|
Tn =T1;
|
||||||
|
Tnm=T0;
|
||||||
|
for(int i=2;i<order;i++){
|
||||||
|
Tnp=2*y*Tn-Tnm;
|
||||||
|
Tnm=Tn;
|
||||||
|
Tn =Tnp;
|
||||||
|
sum+= Tn*Coeffs[i];
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
};
|
||||||
|
|
||||||
|
// shift_Multiply in Rudy's code
|
||||||
|
void AminusMuSq(LinearOperatorBase<Field> &Linop, const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
GridBase *grid=in._grid;
|
||||||
|
Field tmp(grid);
|
||||||
|
|
||||||
|
RealD aa= alpha*alpha;
|
||||||
|
RealD bb= beta * beta;
|
||||||
|
|
||||||
|
Linop.HermOp(in,out);
|
||||||
|
out = out - mu*in;
|
||||||
|
|
||||||
|
Linop.HermOp(out,tmp);
|
||||||
|
tmp = tmp - mu * out;
|
||||||
|
|
||||||
|
out = (2.0/ (aa-bb) ) * tmp - ((aa+bb)/(aa-bb))*in;
|
||||||
|
};
|
||||||
|
// Implement the required interface
|
||||||
|
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||||
|
|
||||||
|
GridBase *grid=in._grid;
|
||||||
|
|
||||||
|
int vol=grid->gSites();
|
||||||
|
|
||||||
|
Field T0(grid); T0 = in;
|
||||||
|
Field T1(grid);
|
||||||
|
Field T2(grid);
|
||||||
|
Field y(grid);
|
||||||
|
|
||||||
|
Field *Tnm = &T0;
|
||||||
|
Field *Tn = &T1;
|
||||||
|
Field *Tnp = &T2;
|
||||||
|
|
||||||
|
// Tn=T1 = (xscale M )*in
|
||||||
|
AminusMuSq(Linop,T0,T1);
|
||||||
|
|
||||||
|
// sum = .5 c[0] T0 + c[1] T1
|
||||||
|
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
|
||||||
|
for(int n=2;n<order;n++){
|
||||||
|
|
||||||
|
AminusMuSq(Linop,*Tn,y);
|
||||||
|
|
||||||
|
*Tnp=2.0*y-(*Tnm);
|
||||||
|
|
||||||
|
out=out+Coeffs[n]* (*Tnp);
|
||||||
|
|
||||||
|
// Cycle pointers to avoid copies
|
||||||
|
Field *swizzle = Tnm;
|
||||||
|
Tnm =Tn;
|
||||||
|
Tn =Tnp;
|
||||||
|
Tnp =swizzle;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#ifndef MULTI_SHIFT_FUNCTION
|
#ifndef MULTI_SHIFT_FUNCTION
|
||||||
#define MULTI_SHIFT_FUNCTION
|
#define MULTI_SHIFT_FUNCTION
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
class MultiShiftFunction {
|
class MultiShiftFunction {
|
||||||
public:
|
public:
|
||||||
int order;
|
int order;
|
||||||
@ -9,20 +11,29 @@ public:
|
|||||||
std::vector<RealD> tolerances;
|
std::vector<RealD> tolerances;
|
||||||
RealD norm;
|
RealD norm;
|
||||||
RealD lo,hi;
|
RealD lo,hi;
|
||||||
|
|
||||||
MultiShiftFunction(int n,RealD _lo,RealD _hi): poles(n), residues(n), lo(_lo), hi(_hi) {;};
|
MultiShiftFunction(int n,RealD _lo,RealD _hi): poles(n), residues(n), lo(_lo), hi(_hi) {;};
|
||||||
RealD approx(RealD x);
|
RealD approx(RealD x);
|
||||||
void csv(std::ostream &out);
|
void csv(std::ostream &out);
|
||||||
void gnuplot(std::ostream &out);
|
void gnuplot(std::ostream &out);
|
||||||
MultiShiftFunction(AlgRemez & remez,double tol,bool inverse) :
|
|
||||||
order(remez.getDegree()),
|
void Init(AlgRemez & remez,double tol,bool inverse)
|
||||||
tolerances(remez.getDegree(),tol),
|
|
||||||
poles(remez.getDegree()),
|
|
||||||
residues(remez.getDegree())
|
|
||||||
{
|
{
|
||||||
|
order=remez.getDegree();
|
||||||
|
tolerances.resize(remez.getDegree(),tol);
|
||||||
|
poles.resize(remez.getDegree());
|
||||||
|
residues.resize(remez.getDegree());
|
||||||
remez.getBounds(lo,hi);
|
remez.getBounds(lo,hi);
|
||||||
if ( inverse ) remez.getIPFE (&residues[0],&poles[0],&norm);
|
if ( inverse ) remez.getIPFE (&residues[0],&poles[0],&norm);
|
||||||
else remez.getPFE (&residues[0],&poles[0],&norm);
|
else remez.getPFE (&residues[0],&poles[0],&norm);
|
||||||
}
|
}
|
||||||
|
// Allow deferred initialisation
|
||||||
|
MultiShiftFunction(void){};
|
||||||
|
MultiShiftFunction(AlgRemez & remez,double tol,bool inverse)
|
||||||
|
{
|
||||||
|
Init(remez,tol,inverse);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -758,3 +758,4 @@ void AlgRemez::csv(std::ostream & os)
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,7 +15,10 @@
|
|||||||
#ifndef INCLUDED_ALG_REMEZ_H
|
#ifndef INCLUDED_ALG_REMEZ_H
|
||||||
#define INCLUDED_ALG_REMEZ_H
|
#define INCLUDED_ALG_REMEZ_H
|
||||||
|
|
||||||
#include <algorithms/approx/bigfloat.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
|
//#include <algorithms/approx/bigfloat.h>
|
||||||
|
#include <algorithms/approx/bigfloat_double.h>
|
||||||
|
|
||||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||||
#define SUM_MAX 10 // Maximum number of terms in exponential
|
#define SUM_MAX 10 // Maximum number of terms in exponential
|
||||||
@ -28,6 +31,7 @@
|
|||||||
remez.getIPFE(res,pole,&norm);
|
remez.getIPFE(res,pole,&norm);
|
||||||
remez.csv(ostream &os);
|
remez.csv(ostream &os);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class AlgRemez
|
class AlgRemez
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
370
lib/algorithms/iterative/AdefGeneric.h
Normal file
370
lib/algorithms/iterative/AdefGeneric.h
Normal file
@ -0,0 +1,370 @@
|
|||||||
|
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||||
|
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv.
|
||||||
|
* Script A = SolverMatrix
|
||||||
|
* Script P = Preconditioner
|
||||||
|
*
|
||||||
|
* Deflation methods considered
|
||||||
|
* -- Solve P A x = P b [ like Luscher ]
|
||||||
|
* DEF-1 M P A x = M P b [i.e. left precon]
|
||||||
|
* DEF-2 P^T M A x = P^T M b
|
||||||
|
* ADEF-1 Preconditioner = M P + Q [ Q + M + M A Q]
|
||||||
|
* ADEF-2 Preconditioner = P^T M + Q
|
||||||
|
* BNN Preconditioner = P^T M P + Q
|
||||||
|
* BNN2 Preconditioner = M P + P^TM +Q - M P A M
|
||||||
|
*
|
||||||
|
* Implement ADEF-2
|
||||||
|
*
|
||||||
|
* Vstart = P^Tx + Qb
|
||||||
|
* M1 = P^TM + Q
|
||||||
|
* M2=M3=1
|
||||||
|
* Vout = x
|
||||||
|
*/
|
||||||
|
|
||||||
|
// abstract base
|
||||||
|
template<class Field, class CoarseField>
|
||||||
|
class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
int verbose;
|
||||||
|
RealD Tolerance;
|
||||||
|
Integer MaxIterations;
|
||||||
|
const int mmax = 5;
|
||||||
|
GridBase *grid;
|
||||||
|
GridBase *coarsegrid;
|
||||||
|
|
||||||
|
LinearOperatorBase<Field> *_Linop
|
||||||
|
OperatorFunction<Field> *_Smoother,
|
||||||
|
LinearFunction<CoarseField> *_CoarseSolver;
|
||||||
|
|
||||||
|
// Need somthing that knows how to get from Coarse to fine and back again
|
||||||
|
|
||||||
|
// more most opertor functions
|
||||||
|
TwoLevelFlexiblePcg(RealD tol,
|
||||||
|
Integer maxit,
|
||||||
|
LinearOperatorBase<Field> *Linop,
|
||||||
|
LinearOperatorBase<Field> *SmootherLinop,
|
||||||
|
OperatorFunction<Field> *Smoother,
|
||||||
|
OperatorFunction<CoarseField> CoarseLinop
|
||||||
|
) :
|
||||||
|
Tolerance(tol),
|
||||||
|
MaxIterations(maxit),
|
||||||
|
_Linop(Linop),
|
||||||
|
_PreconditionerLinop(PrecLinop),
|
||||||
|
_Preconditioner(Preconditioner)
|
||||||
|
{
|
||||||
|
verbose=0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// The Pcg routine is common to all, but the various matrices differ from derived
|
||||||
|
// implementation to derived implmentation
|
||||||
|
void operator() (const Field &src, Field &psi){
|
||||||
|
void operator() (const Field &src, Field &psi){
|
||||||
|
|
||||||
|
psi.checkerboard = src.checkerboard;
|
||||||
|
grid = src._grid;
|
||||||
|
|
||||||
|
RealD f;
|
||||||
|
RealD rtzp,rtz,a,d,b;
|
||||||
|
RealD rptzp;
|
||||||
|
RealD tn;
|
||||||
|
RealD guess = norm2(psi);
|
||||||
|
RealD ssq = norm2(src);
|
||||||
|
RealD rsq = ssq*Tolerance*Tolerance;
|
||||||
|
|
||||||
|
/////////////////////////////
|
||||||
|
// Set up history vectors
|
||||||
|
/////////////////////////////
|
||||||
|
std::vector<Field> p (mmax,grid);
|
||||||
|
std::vector<Field> mmp(mmax,grid);
|
||||||
|
std::vector<RealD> pAp(mmax);
|
||||||
|
|
||||||
|
Field x (grid); x = psi;
|
||||||
|
Field z (grid);
|
||||||
|
Field tmp(grid);
|
||||||
|
Field r (grid);
|
||||||
|
Field mu (grid);
|
||||||
|
|
||||||
|
//////////////////////////
|
||||||
|
// x0 = Vstart -- possibly modify guess
|
||||||
|
//////////////////////////
|
||||||
|
x=src;
|
||||||
|
Vstart(x,src);
|
||||||
|
|
||||||
|
// r0 = b -A x0
|
||||||
|
HermOp(x,mmp); // Shouldn't this be something else?
|
||||||
|
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
|
||||||
|
|
||||||
|
//////////////////////////////////
|
||||||
|
// Compute z = M1 x
|
||||||
|
//////////////////////////////////
|
||||||
|
M1(r,z,tmp,mp,SmootherMirs);
|
||||||
|
rtzp =real(innerProduct(r,z));
|
||||||
|
|
||||||
|
///////////////////////////////////////
|
||||||
|
// Solve for Mss mu = P A z and set p = z-mu
|
||||||
|
// Def2: p = 1 - Q Az = Pright z
|
||||||
|
// Other algos M2 is trivial
|
||||||
|
///////////////////////////////////////
|
||||||
|
M2(z,p[0]);
|
||||||
|
|
||||||
|
for (int k=0;k<=MaxIterations;k++){
|
||||||
|
|
||||||
|
int peri_k = k % mmax;
|
||||||
|
int peri_kp = (k+1) % mmax;
|
||||||
|
|
||||||
|
rtz=rtzp;
|
||||||
|
d= M3(p[peri_k],mp,mmp[peri_k],tmp);
|
||||||
|
a = rtz/d;
|
||||||
|
|
||||||
|
// Memorise this
|
||||||
|
pAp[peri_k] = d;
|
||||||
|
|
||||||
|
axpy(x,a,p[peri_k],x);
|
||||||
|
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
|
||||||
|
|
||||||
|
// Compute z = M x
|
||||||
|
M1(r,z,tmp,mp);
|
||||||
|
|
||||||
|
rtzp =real(innerProduct(r,z));
|
||||||
|
|
||||||
|
M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
|
||||||
|
|
||||||
|
p[peri_kp]=p[peri_k];
|
||||||
|
|
||||||
|
// Standard search direction p -> z + b p ; b =
|
||||||
|
b = (rtzp)/rtz;
|
||||||
|
|
||||||
|
int northog;
|
||||||
|
// northog = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm
|
||||||
|
northog = (k>mmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm
|
||||||
|
|
||||||
|
for(int back=0; back < northog; back++){
|
||||||
|
int peri_back = (k-back)%mmax;
|
||||||
|
RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp]));
|
||||||
|
RealD beta = -pbApk/pAp[peri_back];
|
||||||
|
axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]);
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD rrn=sqrt(rn/ssq);
|
||||||
|
std::cout<<GridLogMessage<<"TwoLevelfPcg: k= "<<k<<" residual = "<<rrn<<std::endl;
|
||||||
|
|
||||||
|
// Stopping condition
|
||||||
|
if ( rn <= rsq ) {
|
||||||
|
|
||||||
|
HermOp(x,mmp); // Shouldn't this be something else?
|
||||||
|
axpy(tmp,-1.0,src,mmp[0]);
|
||||||
|
|
||||||
|
RealD psinorm = sqrt(norm2(x));
|
||||||
|
RealD srcnorm = sqrt(norm2(src));
|
||||||
|
RealD tmpnorm = sqrt(norm2(tmp));
|
||||||
|
RealD true_residual = tmpnorm/srcnorm;
|
||||||
|
std::cout<<GridLogMessage<<"TwoLevelfPcg: true residual is "<<true_residual<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Non-convergence
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
virtual void M(Field & in,Field & out,Field & tmp) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void M1(Field & in, Field & out) {// the smoother
|
||||||
|
|
||||||
|
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
||||||
|
Field tmp(grid);
|
||||||
|
Field Min(grid);
|
||||||
|
|
||||||
|
PcgM(in,Min); // Smoother call
|
||||||
|
|
||||||
|
HermOp(Min,out);
|
||||||
|
axpy(tmp,-1.0,out,in); // tmp = in - A Min
|
||||||
|
|
||||||
|
ProjectToSubspace(tmp,PleftProj);
|
||||||
|
ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
||||||
|
PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
||||||
|
axpy(out,1.0,Min,tmp); // Min+tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void M2(const Field & in, Field & out) {
|
||||||
|
out=in;
|
||||||
|
// Must override for Def2 only
|
||||||
|
// case PcgDef2:
|
||||||
|
// Pright(in,out);
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual RealD M3(const Field & p, Field & mmp){
|
||||||
|
double d,dd;
|
||||||
|
HermOpAndNorm(p,mmp,d,dd);
|
||||||
|
return dd;
|
||||||
|
// Must override for Def1 only
|
||||||
|
// case PcgDef1:
|
||||||
|
// d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
|
||||||
|
// linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
|
||||||
|
// Pleft(mp,mmp);
|
||||||
|
// d=real(linop_d->inner(p,mmp));
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void VstartDef2(Field & xconst Field & src){
|
||||||
|
//case PcgDef2:
|
||||||
|
//case PcgAdef2:
|
||||||
|
//case PcgAdef2f:
|
||||||
|
//case PcgV11f:
|
||||||
|
///////////////////////////////////
|
||||||
|
// Choose x_0 such that
|
||||||
|
// x_0 = guess + (A_ss^inv) r_s = guess + Ass_inv [src -Aguess]
|
||||||
|
// = [1 - Ass_inv A] Guess + Assinv src
|
||||||
|
// = P^T guess + Assinv src
|
||||||
|
// = Vstart [Tang notation]
|
||||||
|
// This gives:
|
||||||
|
// W^T (src - A x_0) = src_s - A guess_s - r_s
|
||||||
|
// = src_s - (A guess)_s - src_s + (A guess)_s
|
||||||
|
// = 0
|
||||||
|
///////////////////////////////////
|
||||||
|
Field r(grid);
|
||||||
|
Field mmp(grid);
|
||||||
|
|
||||||
|
HermOp(x,mmp);
|
||||||
|
axpy (r, -1.0, mmp, src); // r_{-1} = src - A x
|
||||||
|
ProjectToSubspace(r,PleftProj);
|
||||||
|
ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||||
|
PromoteFromSubspace(PleftMss_proj,mmp);
|
||||||
|
x=x+mmp;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void Vstart(Field & x,const Field & src){
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
// Only Def1 has non-trivial Vout. Override in Def1
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
virtual void Vout (Field & in, Field & out,Field & src){
|
||||||
|
out = in;
|
||||||
|
//case PcgDef1:
|
||||||
|
// //Qb + PT x
|
||||||
|
// ProjectToSubspace(src,PleftProj);
|
||||||
|
// ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||||
|
// PromoteFromSubspace(PleftMss_proj,tmp);
|
||||||
|
//
|
||||||
|
// Pright(in,out);
|
||||||
|
//
|
||||||
|
// linop_d->axpy(out,tmp,out,1.0);
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Pright and Pleft are common to all implementations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
virtual void Pright(Field & in,Field & out){
|
||||||
|
// P_R = [ 1 0 ]
|
||||||
|
// [ -Mss^-1 Msb 0 ]
|
||||||
|
Field in_sbar(grid);
|
||||||
|
|
||||||
|
ProjectToSubspace(in,PleftProj);
|
||||||
|
PromoteFromSubspace(PleftProj,out);
|
||||||
|
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
||||||
|
|
||||||
|
HermOp(in_sbar,out);
|
||||||
|
ProjectToSubspace(out,PleftProj); // Mssbar in_sbar (project)
|
||||||
|
|
||||||
|
ApplyInverse (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
|
||||||
|
PromoteFromSubspace(PleftMss_proj,out); //
|
||||||
|
|
||||||
|
axpy(out,-1.0,out,in_sbar); // in_sbar - Mss^{-1} Mssbar in_sbar
|
||||||
|
}
|
||||||
|
virtual void Pleft (Field & in,Field & out){
|
||||||
|
// P_L = [ 1 -Mbs Mss^-1]
|
||||||
|
// [ 0 0 ]
|
||||||
|
Field in_sbar(grid);
|
||||||
|
Field tmp2(grid);
|
||||||
|
Field Mtmp(grid);
|
||||||
|
|
||||||
|
ProjectToSubspace(in,PleftProj);
|
||||||
|
PromoteFromSubspace(PleftProj,out);
|
||||||
|
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
||||||
|
|
||||||
|
ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
|
||||||
|
PromoteFromSubspace(PleftMss_proj,out);
|
||||||
|
|
||||||
|
HermOp(out,Mtmp);
|
||||||
|
|
||||||
|
ProjectToSubspace(Mtmp,PleftProj); // Msbar s Mss^{-1}
|
||||||
|
PromoteFromSubspace(PleftProj,tmp2);
|
||||||
|
|
||||||
|
axpy(out,-1.0,tmp2,Mtmp);
|
||||||
|
axpy(out,-1.0,out,in_sbar); // in_sbar - Msbars Mss^{-1} in_s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
|
||||||
|
public:
|
||||||
|
virtual void M(Field & in,Field & out,Field & tmp){
|
||||||
|
|
||||||
|
}
|
||||||
|
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
|
||||||
|
|
||||||
|
}
|
||||||
|
virtual void M2(Field & in, Field & out){
|
||||||
|
|
||||||
|
}
|
||||||
|
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
|
||||||
|
|
||||||
|
}
|
||||||
|
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
template<class Field>
|
||||||
|
class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
|
||||||
|
public:
|
||||||
|
virtual void M(Field & in,Field & out,Field & tmp);
|
||||||
|
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||||
|
virtual void M2(Field & in, Field & out);
|
||||||
|
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||||
|
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
|
||||||
|
public:
|
||||||
|
virtual void M(Field & in,Field & out,Field & tmp);
|
||||||
|
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||||
|
virtual void M2(Field & in, Field & out);
|
||||||
|
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||||
|
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||||
|
virtual void Vout (Field & in, Field & out,Field & src,Field & tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
|
||||||
|
public:
|
||||||
|
virtual void M(Field & in,Field & out,Field & tmp);
|
||||||
|
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||||
|
virtual void M2(Field & in, Field & out);
|
||||||
|
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||||
|
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
|
||||||
|
public:
|
||||||
|
virtual void M(Field & in,Field & out,Field & tmp);
|
||||||
|
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||||
|
virtual void M2(Field & in, Field & out);
|
||||||
|
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||||
|
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
#endif
|
@ -13,9 +13,7 @@ namespace Grid {
|
|||||||
public:
|
public:
|
||||||
RealD Tolerance;
|
RealD Tolerance;
|
||||||
Integer MaxIterations;
|
Integer MaxIterations;
|
||||||
int verbose;
|
|
||||||
ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
||||||
verbose=1;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -42,14 +40,12 @@ public:
|
|||||||
cp =a;
|
cp =a;
|
||||||
ssq=norm2(src);
|
ssq=norm2(src);
|
||||||
|
|
||||||
if ( verbose ) {
|
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
|
||||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
|
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
|
||||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
|
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
|
||||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
|
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
|
||||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
|
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
|
||||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
|
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
|
||||||
std::cout <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
RealD rsq = Tolerance* Tolerance*ssq;
|
RealD rsq = Tolerance* Tolerance*ssq;
|
||||||
|
|
||||||
@ -58,7 +54,7 @@ public:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
|
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
|
||||||
|
|
||||||
int k;
|
int k;
|
||||||
for (k=1;k<=MaxIterations;k++){
|
for (k=1;k<=MaxIterations;k++){
|
||||||
@ -69,23 +65,19 @@ public:
|
|||||||
|
|
||||||
RealD qqck = norm2(mmp);
|
RealD qqck = norm2(mmp);
|
||||||
ComplexD dck = innerProduct(p,mmp);
|
ComplexD dck = innerProduct(p,mmp);
|
||||||
// if (verbose) std::cout <<std::setprecision(4)<< "ConjugateGradient: d,qq "<<d<< " "<<qq <<" qqcheck "<< qqck<< " dck "<< dck<<std::endl;
|
|
||||||
|
|
||||||
a = c/d;
|
a = c/d;
|
||||||
b_pred = a*(a*qq-d)/c;
|
b_pred = a*(a*qq-d)/c;
|
||||||
|
|
||||||
|
|
||||||
// if (verbose) std::cout <<std::setprecision(4)<< "ConjugateGradient: a,bp "<<a<< " "<<b_pred <<std::endl;
|
|
||||||
cp = axpy_norm(r,-a,mmp,r);
|
cp = axpy_norm(r,-a,mmp,r);
|
||||||
b = cp/c;
|
b = cp/c;
|
||||||
// std::cout <<std::setprecision(4)<< "ConjugateGradient: cp,b "<<cp<< " "<<b <<std::endl;
|
|
||||||
|
|
||||||
// Fuse these loops ; should be really easy
|
// Fuse these loops ; should be really easy
|
||||||
psi= a*p+psi;
|
psi= a*p+psi;
|
||||||
p = p*b+r;
|
p = p*b+r;
|
||||||
|
|
||||||
if (verbose) std::cout<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||||
|
|
||||||
// Stopping condition
|
// Stopping condition
|
||||||
if ( cp <= rsq ) {
|
if ( cp <= rsq ) {
|
||||||
|
|
||||||
@ -98,13 +90,14 @@ public:
|
|||||||
RealD resnorm = sqrt(norm2(p));
|
RealD resnorm = sqrt(norm2(p));
|
||||||
RealD true_residual = resnorm/srcnorm;
|
RealD true_residual = resnorm/srcnorm;
|
||||||
|
|
||||||
std::cout<<"ConjugateGradient: Converged on iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
|
||||||
std::cout<<"ConjugateGradient: true residual is "<<true_residual<<" sol "<<psinorm<<" src "<<srcnorm<<std::endl;
|
<<" computed residual "<<sqrt(cp/ssq)
|
||||||
std::cout<<"ConjugateGradient: target residual was "<<Tolerance<<std::endl;
|
<<" true residual "<<true_residual
|
||||||
|
<<" target "<<Tolerance<<std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout<<"ConjugateGradient did NOT converge"<<std::endl;
|
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -27,10 +27,14 @@ public:
|
|||||||
|
|
||||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
|
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
|
||||||
{
|
{
|
||||||
|
|
||||||
GridBase *grid = src._grid;
|
GridBase *grid = src._grid;
|
||||||
int nshift = shifts.order;
|
int nshift = shifts.order;
|
||||||
std::vector<Field> results(nshift,grid);
|
std::vector<Field> results(nshift,grid);
|
||||||
|
(*this)(Linop,src,results,psi);
|
||||||
|
}
|
||||||
|
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi)
|
||||||
|
{
|
||||||
|
int nshift = shifts.order;
|
||||||
|
|
||||||
(*this)(Linop,src,results);
|
(*this)(Linop,src,results);
|
||||||
|
|
||||||
@ -91,7 +95,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
cp = norm2(src);
|
cp = norm2(src);
|
||||||
for(int s=0;s<nshift;s++){
|
for(int s=0;s<nshift;s++){
|
||||||
rsq[s] = cp * mresidual[s] * mresidual[s];
|
rsq[s] = cp * mresidual[s] * mresidual[s];
|
||||||
std::cout<<"ConjugateGradientMultiShift: shift "<<s
|
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift: shift "<<s
|
||||||
<<" target resid "<<rsq[s]<<std::endl;
|
<<" target resid "<<rsq[s]<<std::endl;
|
||||||
ps[s] = src;
|
ps[s] = src;
|
||||||
}
|
}
|
||||||
@ -109,7 +113,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
// p and mmp is equal to d after this since
|
// p and mmp is equal to d after this since
|
||||||
// the d computation is tricky
|
// the d computation is tricky
|
||||||
// qq = real(innerProduct(p,mmp));
|
// qq = real(innerProduct(p,mmp));
|
||||||
// std::cout << "debug equal ? qq "<<qq<<" d "<< d<<std::endl;
|
// std::cout<<GridLogMessage << "debug equal ? qq "<<qq<<" d "<< d<<std::endl;
|
||||||
|
|
||||||
b = -cp /d;
|
b = -cp /d;
|
||||||
|
|
||||||
@ -214,7 +218,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
|
|
||||||
if(css<rsq[s]){
|
if(css<rsq[s]){
|
||||||
if ( ! converged[s] )
|
if ( ! converged[s] )
|
||||||
std::cout<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
|
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
|
||||||
converged[s]=1;
|
converged[s]=1;
|
||||||
} else {
|
} else {
|
||||||
all_converged=0;
|
all_converged=0;
|
||||||
@ -225,8 +229,8 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
|
|
||||||
if ( all_converged ){
|
if ( all_converged ){
|
||||||
|
|
||||||
std::cout<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
||||||
std::cout<< "CGMultiShift: Checking solutions"<<std::endl;
|
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
|
||||||
|
|
||||||
// Check answers
|
// Check answers
|
||||||
for(int s=0; s < nshift; s++) {
|
for(int s=0; s < nshift; s++) {
|
||||||
@ -235,13 +239,13 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
axpy(r,-alpha[s],src,tmp);
|
axpy(r,-alpha[s],src,tmp);
|
||||||
RealD rn = norm2(r);
|
RealD rn = norm2(r);
|
||||||
RealD cn = norm2(src);
|
RealD cn = norm2(src);
|
||||||
std::cout<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// ugly hack
|
// ugly hack
|
||||||
std::cout<<"CG multi shift did not converge"<<std::endl;
|
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ namespace Grid {
|
|||||||
int verbose;
|
int verbose;
|
||||||
|
|
||||||
ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
||||||
verbose=1;
|
verbose=0;
|
||||||
};
|
};
|
||||||
|
|
||||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||||
@ -37,14 +37,11 @@ namespace Grid {
|
|||||||
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
|
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
|
||||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||||
|
|
||||||
std::cout << "pAp, pAAp"<< pAp<<" "<<pAAp<<std::endl;
|
|
||||||
std::cout << "rAr, rAAr"<< rAr<<" "<<rAAr<<std::endl;
|
|
||||||
|
|
||||||
cp =norm2(r);
|
cp =norm2(r);
|
||||||
ssq=norm2(src);
|
ssq=norm2(src);
|
||||||
rsq=Tolerance*Tolerance*ssq;
|
rsq=Tolerance*Tolerance*ssq;
|
||||||
|
|
||||||
std::cout<<"ConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
if (verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||||
|
|
||||||
for(int k=1;k<MaxIterations;k++){
|
for(int k=1;k<MaxIterations;k++){
|
||||||
|
|
||||||
@ -62,22 +59,23 @@ namespace Grid {
|
|||||||
|
|
||||||
axpy(p,b,p,r);
|
axpy(p,b,p,r);
|
||||||
pAAp=axpy_norm(Ap,b,Ap,Ar);
|
pAAp=axpy_norm(Ap,b,Ap,Ar);
|
||||||
|
|
||||||
std::cout<<"ConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
if(verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||||
|
|
||||||
if(cp<rsq) {
|
if(cp<rsq) {
|
||||||
Linop.HermOp(psi,Ap);
|
Linop.HermOp(psi,Ap);
|
||||||
axpy(r,-1.0,src,Ap);
|
axpy(r,-1.0,src,Ap);
|
||||||
RealD true_resid = norm2(r);
|
RealD true_resid = norm2(r)/ssq;
|
||||||
std::cout<<"ConjugateResidual: Converged on iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
std::cout<<GridLogMessage<<"ConjugateResidual: Converged on iteration " <<k
|
||||||
std::cout<<"ConjugateResidual: true residual is "<<true_resid<<std::endl;
|
<< " computed residual "<<sqrt(cp/ssq)
|
||||||
std::cout<<"ConjugateResidual: target residual was "<<Tolerance <<std::endl;
|
<< " true residual "<<sqrt(true_resid)
|
||||||
|
<< " target " <<Tolerance <<std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout<<"ConjugateResidual did NOT converge"<<std::endl;
|
std::cout<<GridLogMessage<<"ConjugateResidual did NOT converge"<<std::endl;
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
109
lib/algorithms/iterative/DenseMatrix.h
Normal file
109
lib/algorithms/iterative/DenseMatrix.h
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#ifndef GRID_DENSE_MATRIX_H
|
||||||
|
#define GRID_DENSE_MATRIX_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Matrix untils
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<class T> using DenseVector = std::vector<T>;
|
||||||
|
template<class T> using DenseMatrix = DenseVector<DenseVector<T> >;
|
||||||
|
|
||||||
|
template<class T> void Size(DenseVector<T> & vec, int &N)
|
||||||
|
{
|
||||||
|
N= vec.size();
|
||||||
|
}
|
||||||
|
template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)
|
||||||
|
{
|
||||||
|
N= mat.size();
|
||||||
|
M= mat[0].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)
|
||||||
|
{
|
||||||
|
int M; Size(mat,N,M);
|
||||||
|
assert(N==M);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> void Resize(DenseVector<T > & mat, int N) {
|
||||||
|
mat.resize(N);
|
||||||
|
}
|
||||||
|
template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {
|
||||||
|
mat.resize(N);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
mat[i].resize(M);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class T> void Fill(DenseMatrix<T> & mat, T&val) {
|
||||||
|
int N,M;
|
||||||
|
Size(mat,N,M);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<M;j++){
|
||||||
|
mat[i][j] = val;
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Transpose of a matrix **/
|
||||||
|
template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){
|
||||||
|
int N,M;
|
||||||
|
Size(mat,N,M);
|
||||||
|
DenseMatrix<T> C; Resize(C,M,N);
|
||||||
|
for(int i=0;i<M;i++){
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
C[i][j] = mat[j][i];
|
||||||
|
}}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
/** Set DenseMatrix to unit matrix **/
|
||||||
|
template<class T> void Unity(DenseMatrix<T> &A){
|
||||||
|
int N; SizeSquare(A,N);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
if ( i==j ) A[i][j] = 1;
|
||||||
|
else A[i][j] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Add C * I to matrix **/
|
||||||
|
template<class T>
|
||||||
|
void PlusUnit(DenseMatrix<T> & A,T c){
|
||||||
|
int dim; SizeSquare(A,dim);
|
||||||
|
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** return the Hermitian conjugate of matrix **/
|
||||||
|
template<class T>
|
||||||
|
DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){
|
||||||
|
|
||||||
|
int dim; SizeSquare(mat,dim);
|
||||||
|
|
||||||
|
DenseMatrix<T> C; Resize(C,dim,dim);
|
||||||
|
|
||||||
|
for(int i=0;i<dim;i++){
|
||||||
|
for(int j=0;j<dim;j++){
|
||||||
|
C[i][j] = conj(mat[j][i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
/**Get a square submatrix**/
|
||||||
|
template <class T>
|
||||||
|
DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end)
|
||||||
|
{
|
||||||
|
DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st);
|
||||||
|
|
||||||
|
for(int i = row_st; i<row_end; i++){
|
||||||
|
for(int j = col_st; j<col_end; j++){
|
||||||
|
H[i-row_st][j-col_st]=A[i][j];
|
||||||
|
}}
|
||||||
|
return H;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#include <algorithms/iterative/Householder.h>
|
||||||
|
#include <algorithms/iterative/Francis.h>
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
52
lib/algorithms/iterative/EigenSort.h
Normal file
52
lib/algorithms/iterative/EigenSort.h
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
#ifndef GRID_EIGENSORT_H
|
||||||
|
#define GRID_EIGENSORT_H
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Eigen sorter to begin with
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class SortEigen {
|
||||||
|
private:
|
||||||
|
|
||||||
|
static bool less_lmd(RealD left,RealD right){
|
||||||
|
return fabs(left) < fabs(right);
|
||||||
|
}
|
||||||
|
static bool less_pair(std::pair<RealD,Field>& left,
|
||||||
|
std::pair<RealD,Field>& right){
|
||||||
|
return fabs(left.first) < fabs(right.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
void push(DenseVector<RealD>& lmd,
|
||||||
|
DenseVector<Field>& evec,int N) {
|
||||||
|
|
||||||
|
DenseVector<std::pair<RealD, Field> > emod;
|
||||||
|
typename DenseVector<std::pair<RealD, Field> >::iterator it;
|
||||||
|
|
||||||
|
for(int i=0;i<lmd.size();++i){
|
||||||
|
emod.push_back(std::pair<RealD,Field>(lmd[i],evec[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
|
||||||
|
|
||||||
|
it=emod.begin();
|
||||||
|
for(int i=0;i<N;++i){
|
||||||
|
lmd[i]=it->first;
|
||||||
|
evec[i]=it->second;
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void push(DenseVector<RealD>& lmd,int N) {
|
||||||
|
std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd);
|
||||||
|
}
|
||||||
|
bool saturated(RealD lmd, RealD thrs) {
|
||||||
|
return fabs(lmd) > fabs(thrs);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
498
lib/algorithms/iterative/Francis.h
Normal file
498
lib/algorithms/iterative/Francis.h
Normal file
@ -0,0 +1,498 @@
|
|||||||
|
#ifndef FRANCIS_H
|
||||||
|
#define FRANCIS_H
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
#include <cmath>
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <fstream>
|
||||||
|
#include <complex>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
//#include <timer.h>
|
||||||
|
//#include <lapacke.h>
|
||||||
|
//#include <Eigen/Dense>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
|
||||||
|
template <class T> int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
|
||||||
|
|
||||||
|
/**
|
||||||
|
Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm.
|
||||||
|
H =
|
||||||
|
x x x x x x x x x
|
||||||
|
x x x x x x x x x
|
||||||
|
0 x x x x x x x x
|
||||||
|
0 0 x x x x x x x
|
||||||
|
0 0 0 x x x x x x
|
||||||
|
0 0 0 0 x x x x x
|
||||||
|
0 0 0 0 0 x x x x
|
||||||
|
0 0 0 0 0 0 x x x
|
||||||
|
0 0 0 0 0 0 0 x x
|
||||||
|
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.
|
||||||
|
**/
|
||||||
|
template <class T>
|
||||||
|
int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
|
||||||
|
{
|
||||||
|
DenseMatrix<T> H = Hin;
|
||||||
|
|
||||||
|
int N ; SizeSquare(H,N);
|
||||||
|
int M = N;
|
||||||
|
|
||||||
|
Fill(evals,0);
|
||||||
|
Fill(evecs,0);
|
||||||
|
|
||||||
|
T s,t,x=0,y=0,z=0;
|
||||||
|
T u,d;
|
||||||
|
T apd,amd,bc;
|
||||||
|
DenseVector<T> p(N,0);
|
||||||
|
T nrm = Norm(H); ///DenseMatrix Norm
|
||||||
|
int n, m;
|
||||||
|
int e = 0;
|
||||||
|
int it = 0;
|
||||||
|
int tot_it = 0;
|
||||||
|
int l = 0;
|
||||||
|
int r = 0;
|
||||||
|
DenseMatrix<T> P; Resize(P,N,N); Unity(P);
|
||||||
|
DenseVector<int> trows(N,0);
|
||||||
|
|
||||||
|
/// Check if the matrix is really hessenberg, if not abort
|
||||||
|
RealD sth = 0;
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
for(int i=j+2;i<N;i++){
|
||||||
|
sth = abs(H[i][j]);
|
||||||
|
if(sth > small){
|
||||||
|
std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
do{
|
||||||
|
std::cout << "Francis QR Step N = " << N << std::endl;
|
||||||
|
/** Check for convergence
|
||||||
|
x x x x x
|
||||||
|
0 x x x x
|
||||||
|
0 0 x x x
|
||||||
|
0 0 x x x
|
||||||
|
0 0 0 0 x
|
||||||
|
for this matrix l = 4
|
||||||
|
**/
|
||||||
|
do{
|
||||||
|
l = Chop_subdiag(H,nrm,e,small);
|
||||||
|
r = 0; ///May have converged on more than one eval
|
||||||
|
///Single eval
|
||||||
|
if(l == N-1){
|
||||||
|
evals[e] = H[l][l];
|
||||||
|
N--; e++; r++; it = 0;
|
||||||
|
}
|
||||||
|
///RealD eval
|
||||||
|
if(l == N-2){
|
||||||
|
trows[l+1] = 1; ///Needed for UTSolve
|
||||||
|
apd = H[l][l] + H[l+1][l+1];
|
||||||
|
amd = H[l][l] - H[l+1][l+1];
|
||||||
|
bc = (T)4.0*H[l+1][l]*H[l][l+1];
|
||||||
|
evals[e] = (T)0.5*( apd + sqrt(amd*amd + bc) );
|
||||||
|
evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) );
|
||||||
|
N-=2; e+=2; r++; it = 0;
|
||||||
|
}
|
||||||
|
} while(r>0);
|
||||||
|
|
||||||
|
if(N ==0) break;
|
||||||
|
|
||||||
|
DenseVector<T > ck; Resize(ck,3);
|
||||||
|
DenseVector<T> v; Resize(v,3);
|
||||||
|
|
||||||
|
for(int m = N-3; m >= l; m--){
|
||||||
|
///Starting vector essentially random shift.
|
||||||
|
if(it%10 == 0 && N >= 3 && it > 0){
|
||||||
|
s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
|
||||||
|
t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
|
||||||
|
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
|
||||||
|
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
|
||||||
|
z = H[m+1][m]*H[m+2][m+1];
|
||||||
|
}
|
||||||
|
///Starting vector implicit Q theorem
|
||||||
|
else{
|
||||||
|
s = (H[N-2][N-2] + H[N-1][N-1]);
|
||||||
|
t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]);
|
||||||
|
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
|
||||||
|
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
|
||||||
|
z = H[m+1][m]*H[m+2][m+1];
|
||||||
|
}
|
||||||
|
ck[0] = x; ck[1] = y; ck[2] = z;
|
||||||
|
|
||||||
|
if(m == l) break;
|
||||||
|
|
||||||
|
/** Some stupid thing from numerical recipies, seems to work**/
|
||||||
|
// PAB.. for heaven's sake quote page, purpose, evidence it works.
|
||||||
|
// what sort of comment is that!?!?!?
|
||||||
|
u=abs(H[m][m-1])*(abs(y)+abs(z));
|
||||||
|
d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1]));
|
||||||
|
if ((T)abs(u+d) == (T)abs(d) ){
|
||||||
|
l = m; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
//if (u < small){l = m; break;}
|
||||||
|
}
|
||||||
|
if(it > 100000){
|
||||||
|
std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl;
|
||||||
|
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
normalize(ck); ///Normalization cancels in PHP anyway
|
||||||
|
T beta;
|
||||||
|
Householder_vector<T >(ck, 0, 2, v, beta);
|
||||||
|
Householder_mult<T >(H,v,beta,0,l,l+2,0);
|
||||||
|
Householder_mult<T >(H,v,beta,0,l,l+2,1);
|
||||||
|
///Accumulate eigenvector
|
||||||
|
Householder_mult<T >(P,v,beta,0,l,l+2,1);
|
||||||
|
int sw = 0; ///Are we on the last row?
|
||||||
|
for(int k=l;k<N-2;k++){
|
||||||
|
x = H[k+1][k];
|
||||||
|
y = H[k+2][k];
|
||||||
|
z = (T)0.0;
|
||||||
|
if(k+3 <= N-1){
|
||||||
|
z = H[k+3][k];
|
||||||
|
} else{
|
||||||
|
sw = 1;
|
||||||
|
v[2] = (T)0.0;
|
||||||
|
}
|
||||||
|
ck[0] = x; ck[1] = y; ck[2] = z;
|
||||||
|
normalize(ck);
|
||||||
|
Householder_vector<T >(ck, 0, 2-sw, v, beta);
|
||||||
|
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0);
|
||||||
|
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1);
|
||||||
|
///Accumulate eigenvector
|
||||||
|
Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1);
|
||||||
|
}
|
||||||
|
it++;
|
||||||
|
tot_it++;
|
||||||
|
}while(N > 1);
|
||||||
|
N = evals.size();
|
||||||
|
///Annoying - UT solves in reverse order;
|
||||||
|
DenseVector<T> tmp; Resize(tmp,N);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
tmp[i] = evals[N-i-1];
|
||||||
|
}
|
||||||
|
evals = tmp;
|
||||||
|
UTeigenvectors(H, trows, evals, evecs);
|
||||||
|
for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);}
|
||||||
|
return tot_it;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm.
|
||||||
|
H =
|
||||||
|
x x 0 0 0 0
|
||||||
|
x x x 0 0 0
|
||||||
|
0 x x x 0 0
|
||||||
|
0 0 x x x 0
|
||||||
|
0 0 0 x x x
|
||||||
|
0 0 0 0 x x
|
||||||
|
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. **/
|
||||||
|
return my_Wilkinson(Hin, evals, evecs, small, small);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol)
|
||||||
|
{
|
||||||
|
int N; SizeSquare(Hin,N);
|
||||||
|
int M = N;
|
||||||
|
|
||||||
|
///I don't want to modify the input but matricies must be passed by reference
|
||||||
|
//Scale a matrix by its "norm"
|
||||||
|
//RealD Hnorm = abs( Hin.LargestDiag() ); H = H*(1.0/Hnorm);
|
||||||
|
DenseMatrix<T> H; H = Hin;
|
||||||
|
|
||||||
|
RealD Hnorm = abs(Norm(Hin));
|
||||||
|
H = H * (1.0 / Hnorm);
|
||||||
|
|
||||||
|
// TODO use openmp and memset
|
||||||
|
Fill(evals,0);
|
||||||
|
Fill(evecs,0);
|
||||||
|
|
||||||
|
T s, t, x = 0, y = 0, z = 0;
|
||||||
|
T u, d;
|
||||||
|
T apd, amd, bc;
|
||||||
|
DenseVector<T> p; Resize(p,N); Fill(p,0);
|
||||||
|
|
||||||
|
T nrm = Norm(H); ///DenseMatrix Norm
|
||||||
|
int n, m;
|
||||||
|
int e = 0;
|
||||||
|
int it = 0;
|
||||||
|
int tot_it = 0;
|
||||||
|
int l = 0;
|
||||||
|
int r = 0;
|
||||||
|
DenseMatrix<T> P; Resize(P,N,N);
|
||||||
|
Unity(P);
|
||||||
|
DenseVector<int> trows(N, 0);
|
||||||
|
/// Check if the matrix is really symm tridiag
|
||||||
|
RealD sth = 0;
|
||||||
|
for(int j = 0; j < N; ++j)
|
||||||
|
{
|
||||||
|
for(int i = j + 2; i < N; ++i)
|
||||||
|
{
|
||||||
|
if(abs(H[i][j]) > tol || abs(H[j][i]) > tol)
|
||||||
|
{
|
||||||
|
std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl;
|
||||||
|
std::cout << "Warning tridiagonalize and call again" << std::endl;
|
||||||
|
// exit(1); // see what is going on
|
||||||
|
//return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
do{
|
||||||
|
do{
|
||||||
|
//Jasper
|
||||||
|
//Check if the subdiagonal term is small enough (<small)
|
||||||
|
//if true then it is converged.
|
||||||
|
//check start from H.dim - e - 1
|
||||||
|
//How to deal with more than 2 are converged?
|
||||||
|
//What if Chop_symm_subdiag return something int the middle?
|
||||||
|
//--------------
|
||||||
|
l = Chop_symm_subdiag(H,nrm, e, small);
|
||||||
|
r = 0; ///May have converged on more than one eval
|
||||||
|
//Jasper
|
||||||
|
//In this case
|
||||||
|
// x x 0 0 0 0
|
||||||
|
// x x x 0 0 0
|
||||||
|
// 0 x x x 0 0
|
||||||
|
// 0 0 x x x 0
|
||||||
|
// 0 0 0 x x 0
|
||||||
|
// 0 0 0 0 0 x <- l
|
||||||
|
//--------------
|
||||||
|
///Single eval
|
||||||
|
if(l == N - 1)
|
||||||
|
{
|
||||||
|
evals[e] = H[l][l];
|
||||||
|
N--;
|
||||||
|
e++;
|
||||||
|
r++;
|
||||||
|
it = 0;
|
||||||
|
}
|
||||||
|
//Jasper
|
||||||
|
// x x 0 0 0 0
|
||||||
|
// x x x 0 0 0
|
||||||
|
// 0 x x x 0 0
|
||||||
|
// 0 0 x x 0 0
|
||||||
|
// 0 0 0 0 x x <- l
|
||||||
|
// 0 0 0 0 x x
|
||||||
|
//--------------
|
||||||
|
///RealD eval
|
||||||
|
if(l == N - 2)
|
||||||
|
{
|
||||||
|
trows[l + 1] = 1; ///Needed for UTSolve
|
||||||
|
apd = H[l][l] + H[l + 1][ l + 1];
|
||||||
|
amd = H[l][l] - H[l + 1][l + 1];
|
||||||
|
bc = (T) 4.0 * H[l + 1][l] * H[l][l + 1];
|
||||||
|
evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc));
|
||||||
|
evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc));
|
||||||
|
N -= 2;
|
||||||
|
e += 2;
|
||||||
|
r++;
|
||||||
|
it = 0;
|
||||||
|
}
|
||||||
|
}while(r > 0);
|
||||||
|
//Jasper
|
||||||
|
//Already converged
|
||||||
|
//--------------
|
||||||
|
if(N == 0) break;
|
||||||
|
|
||||||
|
DenseVector<T> ck,v; Resize(ck,2); Resize(v,2);
|
||||||
|
|
||||||
|
for(int m = N - 3; m >= l; m--)
|
||||||
|
{
|
||||||
|
///Starting vector essentially random shift.
|
||||||
|
if(it%10 == 0 && N >= 3 && it > 0)
|
||||||
|
{
|
||||||
|
t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]);
|
||||||
|
x = H[m][m] - t;
|
||||||
|
z = H[m + 1][m];
|
||||||
|
} else {
|
||||||
|
///Starting vector implicit Q theorem
|
||||||
|
d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5;
|
||||||
|
t = H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]
|
||||||
|
/ (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2]));
|
||||||
|
x = H[m][m] - t;
|
||||||
|
z = H[m + 1][m];
|
||||||
|
}
|
||||||
|
//Jasper
|
||||||
|
//why it is here????
|
||||||
|
//-----------------------
|
||||||
|
if(m == l)
|
||||||
|
break;
|
||||||
|
|
||||||
|
u = abs(H[m][m - 1]) * (abs(y) + abs(z));
|
||||||
|
d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1]));
|
||||||
|
if ((T)abs(u + d) == (T)abs(d))
|
||||||
|
{
|
||||||
|
l = m;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Jasper
|
||||||
|
if(it > 1000000)
|
||||||
|
{
|
||||||
|
std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl;
|
||||||
|
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
//
|
||||||
|
T s, c;
|
||||||
|
Givens_calc<T>(x, z, c, s);
|
||||||
|
Givens_mult<T>(H, l, l + 1, c, -s, 0);
|
||||||
|
Givens_mult<T>(H, l, l + 1, c, s, 1);
|
||||||
|
Givens_mult<T>(P, l, l + 1, c, s, 1);
|
||||||
|
//
|
||||||
|
for(int k = l; k < N - 2; ++k)
|
||||||
|
{
|
||||||
|
x = H.A[k + 1][k];
|
||||||
|
z = H.A[k + 2][k];
|
||||||
|
Givens_calc<T>(x, z, c, s);
|
||||||
|
Givens_mult<T>(H, k + 1, k + 2, c, -s, 0);
|
||||||
|
Givens_mult<T>(H, k + 1, k + 2, c, s, 1);
|
||||||
|
Givens_mult<T>(P, k + 1, k + 2, c, s, 1);
|
||||||
|
}
|
||||||
|
it++;
|
||||||
|
tot_it++;
|
||||||
|
}while(N > 1);
|
||||||
|
|
||||||
|
N = evals.size();
|
||||||
|
///Annoying - UT solves in reverse order;
|
||||||
|
DenseVector<T> tmp(N);
|
||||||
|
for(int i = 0; i < N; ++i)
|
||||||
|
tmp[i] = evals[N-i-1];
|
||||||
|
evals = tmp;
|
||||||
|
//
|
||||||
|
UTeigenvectors(H, trows, evals, evecs);
|
||||||
|
//UTSymmEigenvectors(H, trows, evals, evecs);
|
||||||
|
for(int i = 0; i < evals.size(); ++i)
|
||||||
|
{
|
||||||
|
evecs[i] = P * evecs[i];
|
||||||
|
normalize(evecs[i]);
|
||||||
|
evals[i] = evals[i] * Hnorm;
|
||||||
|
}
|
||||||
|
// // FIXME this is to test
|
||||||
|
// Hin.write("evecs3", evecs);
|
||||||
|
// Hin.write("evals3", evals);
|
||||||
|
// // check rsd
|
||||||
|
// for(int i = 0; i < M; i++) {
|
||||||
|
// vector<T> Aevec = Hin * evecs[i];
|
||||||
|
// RealD norm2(0.);
|
||||||
|
// for(int j = 0; j < M; j++) {
|
||||||
|
// norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
return tot_it;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
|
||||||
|
|
||||||
|
/**
|
||||||
|
turn a matrix A =
|
||||||
|
x x x x x
|
||||||
|
x x x x x
|
||||||
|
x x x x x
|
||||||
|
x x x x x
|
||||||
|
x x x x x
|
||||||
|
into
|
||||||
|
x x x x x
|
||||||
|
x x x x x
|
||||||
|
0 x x x x
|
||||||
|
0 0 x x x
|
||||||
|
0 0 0 x x
|
||||||
|
with householder rotations
|
||||||
|
Slow.
|
||||||
|
*/
|
||||||
|
int N ; SizeSquare(A,N);
|
||||||
|
DenseVector<T > p; Resize(p,N); Fill(p,0);
|
||||||
|
|
||||||
|
for(int k=start;k<N-2;k++){
|
||||||
|
//cerr << "hess" << k << std::endl;
|
||||||
|
DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1);
|
||||||
|
for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);} ///kth column
|
||||||
|
normalize(ck); ///Normalization cancels in PHP anyway
|
||||||
|
T beta;
|
||||||
|
Householder_vector<T >(ck, 0, ck.size()-1, v, beta); ///Householder vector
|
||||||
|
Householder_mult<T>(A,v,beta,start,k+1,N-1,0); ///A -> PA
|
||||||
|
Householder_mult<T >(A,v,beta,start,k+1,N-1,1); ///PA -> PAP^H
|
||||||
|
///Accumulate eigenvector
|
||||||
|
Householder_mult<T >(Q,v,beta,start,k+1,N-1,1); ///Q -> QP^H
|
||||||
|
}
|
||||||
|
/*for(int l=0;l<N-2;l++){
|
||||||
|
for(int k=l+2;k<N;k++){
|
||||||
|
A(0,k,l);
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
|
||||||
|
///Tridiagonalize a matrix
|
||||||
|
int N; SizeSquare(A,N);
|
||||||
|
Hess(A,Q,start);
|
||||||
|
/*for(int l=0;l<N-2;l++){
|
||||||
|
for(int k=l+2;k<N;k++){
|
||||||
|
A(0,l,k);
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void ForceTridiagonal(DenseMatrix<T> &A){
|
||||||
|
///Tridiagonalize a matrix
|
||||||
|
int N ; SizeSquare(A,N);
|
||||||
|
for(int l=0;l<N-2;l++){
|
||||||
|
for(int k=l+2;k<N;k++){
|
||||||
|
A[l][k]=0;
|
||||||
|
A[k][l]=0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
|
||||||
|
///Solve a symmetric eigensystem, not necessarily in tridiagonal form
|
||||||
|
int N; SizeSquare(Ain,N);
|
||||||
|
DenseMatrix<T > A; A = Ain;
|
||||||
|
DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q);
|
||||||
|
Tri(A,Q,0);
|
||||||
|
int it = my_Wilkinson<T>(A, evals, evecs, small);
|
||||||
|
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
|
||||||
|
return it;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
|
||||||
|
return my_Wilkinson(Ain, evals, evecs, small);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
|
||||||
|
return my_SymmEigensystem(Ain, evals, evecs, small);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
|
||||||
|
///Solve a general eigensystem, not necessarily in tridiagonal form
|
||||||
|
int N = Ain.dim;
|
||||||
|
DenseMatrix<T > A(N); A = Ain;
|
||||||
|
DenseMatrix<T > Q(N);Q.Unity();
|
||||||
|
Hess(A,Q,0);
|
||||||
|
int it = QReigensystem<T>(A, evals, evecs, small);
|
||||||
|
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
|
||||||
|
return it;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
215
lib/algorithms/iterative/Householder.h
Normal file
215
lib/algorithms/iterative/Householder.h
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
#ifndef HOUSEHOLDER_H
|
||||||
|
#define HOUSEHOLDER_H
|
||||||
|
|
||||||
|
#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
|
||||||
|
#define ENTER() std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
|
||||||
|
#define LEAVE() std::cout << GridLogMessage << "EXIT "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
#include <cmath>
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <fstream>
|
||||||
|
#include <complex>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
/** Comparison function for finding the max element in a vector **/
|
||||||
|
template <class T> bool cf(T i, T j) {
|
||||||
|
return abs(i) < abs(j);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Calculate a real Givens angle
|
||||||
|
**/
|
||||||
|
template <class T> inline void Givens_calc(T y, T z, T &c, T &s){
|
||||||
|
|
||||||
|
RealD mz = (RealD)abs(z);
|
||||||
|
|
||||||
|
if(mz==0.0){
|
||||||
|
c = 1; s = 0;
|
||||||
|
}
|
||||||
|
if(mz >= (RealD)abs(y)){
|
||||||
|
T t = -y/z;
|
||||||
|
s = (T)1.0 / sqrt ((T)1.0 + t * t);
|
||||||
|
c = s * t;
|
||||||
|
} else {
|
||||||
|
T t = -z/y;
|
||||||
|
c = (T)1.0 / sqrt ((T)1.0 + t * t);
|
||||||
|
s = c * t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T> inline void Givens_mult(DenseMatrix<T> &A, int i, int k, T c, T s, int dir)
|
||||||
|
{
|
||||||
|
int q ; SizeSquare(A,q);
|
||||||
|
|
||||||
|
if(dir == 0){
|
||||||
|
for(int j=0;j<q;j++){
|
||||||
|
T nu = A[i][j];
|
||||||
|
T w = A[k][j];
|
||||||
|
A[i][j] = (c*nu + s*w);
|
||||||
|
A[k][j] = (-s*nu + c*w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(dir == 1){
|
||||||
|
for(int j=0;j<q;j++){
|
||||||
|
T nu = A[j][i];
|
||||||
|
T w = A[j][k];
|
||||||
|
A[j][i] = (c*nu - s*w);
|
||||||
|
A[j][k] = (s*nu + c*w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
from input = x;
|
||||||
|
Compute the complex Householder vector, v, such that
|
||||||
|
P = (I - b v transpose(v) )
|
||||||
|
b = 2/v.v
|
||||||
|
|
||||||
|
P | x | | x | k = 0
|
||||||
|
| x | | 0 |
|
||||||
|
| x | = | 0 |
|
||||||
|
| x | | 0 | j = 3
|
||||||
|
| x | | x |
|
||||||
|
|
||||||
|
These are the "Unreduced" Householder vectors.
|
||||||
|
|
||||||
|
**/
|
||||||
|
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta)
|
||||||
|
{
|
||||||
|
int N ; Size(input,N);
|
||||||
|
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> );
|
||||||
|
|
||||||
|
if(abs(m) > 0.0){
|
||||||
|
T alpha = 0;
|
||||||
|
|
||||||
|
for(int i=k; i<j+1; i++){
|
||||||
|
v[i] = input[i]/m;
|
||||||
|
alpha = alpha + v[i]*conj(v[i]);
|
||||||
|
}
|
||||||
|
alpha = sqrt(alpha);
|
||||||
|
beta = (T)1.0/(alpha*(alpha + abs(v[k]) ));
|
||||||
|
|
||||||
|
if(abs(v[k]) > 0.0) v[k] = v[k] + (v[k]/abs(v[k]))*alpha;
|
||||||
|
else v[k] = -alpha;
|
||||||
|
} else{
|
||||||
|
for(int i=k; i<j+1; i++){
|
||||||
|
v[i] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
from input = x;
|
||||||
|
Compute the complex Householder vector, v, such that
|
||||||
|
P = (I - b v transpose(v) )
|
||||||
|
b = 2/v.v
|
||||||
|
|
||||||
|
Px = alpha*e_dir
|
||||||
|
|
||||||
|
These are the "Unreduced" Householder vectors.
|
||||||
|
|
||||||
|
**/
|
||||||
|
|
||||||
|
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta)
|
||||||
|
{
|
||||||
|
int N = input.size();
|
||||||
|
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf);
|
||||||
|
|
||||||
|
if(abs(m) > 0.0){
|
||||||
|
T alpha = 0;
|
||||||
|
|
||||||
|
for(int i=k; i<j+1; i++){
|
||||||
|
v[i] = input[i]/m;
|
||||||
|
alpha = alpha + v[i]*conj(v[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
alpha = sqrt(alpha);
|
||||||
|
beta = 1.0/(alpha*(alpha + abs(v[dir]) ));
|
||||||
|
|
||||||
|
if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha;
|
||||||
|
else v[dir] = -alpha;
|
||||||
|
}else{
|
||||||
|
for(int i=k; i<j+1; i++){
|
||||||
|
v[i] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Compute the product PA if trans = 0
|
||||||
|
AP if trans = 1
|
||||||
|
P = (I - b v transpose(v) )
|
||||||
|
b = 2/v.v
|
||||||
|
start at element l of matrix A
|
||||||
|
v is of length j - k + 1 of v are nonzero
|
||||||
|
**/
|
||||||
|
|
||||||
|
template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans)
|
||||||
|
{
|
||||||
|
int N ; SizeSquare(A,N);
|
||||||
|
|
||||||
|
if(abs(beta) > 0.0){
|
||||||
|
for(int p=l; p<N; p++){
|
||||||
|
T s = 0;
|
||||||
|
if(trans==0){
|
||||||
|
for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p];
|
||||||
|
s *= beta;
|
||||||
|
for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);}
|
||||||
|
} else {
|
||||||
|
for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];}
|
||||||
|
s *= beta;
|
||||||
|
for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Compute the product PA if trans = 0
|
||||||
|
AP if trans = 1
|
||||||
|
P = (I - b v transpose(v) )
|
||||||
|
b = 2/v.v
|
||||||
|
start at element l of matrix A
|
||||||
|
v is of length j - k + 1 of v are nonzero
|
||||||
|
A is tridiagonal
|
||||||
|
**/
|
||||||
|
template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans)
|
||||||
|
{
|
||||||
|
if(abs(beta) > 0.0){
|
||||||
|
|
||||||
|
int N ; SizeSquare(A,N);
|
||||||
|
|
||||||
|
DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);
|
||||||
|
|
||||||
|
T s;
|
||||||
|
for(int p=l; p<M; p++){
|
||||||
|
s = 0;
|
||||||
|
if(trans==0){
|
||||||
|
for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p];
|
||||||
|
}else{
|
||||||
|
for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i];
|
||||||
|
}
|
||||||
|
s = beta*s;
|
||||||
|
if(trans==0){
|
||||||
|
for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k];
|
||||||
|
}else{
|
||||||
|
for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int p=l; p<M; p++){
|
||||||
|
if(trans==0){
|
||||||
|
for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p];
|
||||||
|
}else{
|
||||||
|
for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
1006
lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Normal file
1006
lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Normal file
File diff suppressed because it is too large
Load Diff
426
lib/algorithms/iterative/Matrix.h
Normal file
426
lib/algorithms/iterative/Matrix.h
Normal file
@ -0,0 +1,426 @@
|
|||||||
|
#ifndef MATRIX_H
|
||||||
|
#define MATRIX_H
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
#include <cmath>
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <complex>
|
||||||
|
#include <typeinfo>
|
||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
/** Sign function **/
|
||||||
|
template <class T> T sign(T p){return ( p/abs(p) );}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
///////////////////// Hijack STL containers for our wicked means /////////////////////////////////////////
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class T> using Vector = Vector<T>;
|
||||||
|
template<class T> using Matrix = Vector<Vector<T> >;
|
||||||
|
|
||||||
|
template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); }
|
||||||
|
|
||||||
|
template<class T> void Resize(Matrix<T > & mat, int N, int M) {
|
||||||
|
mat.resize(N);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
mat[i].resize(M);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class T> void Size(Vector<T> & vec, int &N)
|
||||||
|
{
|
||||||
|
N= vec.size();
|
||||||
|
}
|
||||||
|
template<class T> void Size(Matrix<T> & mat, int &N,int &M)
|
||||||
|
{
|
||||||
|
N= mat.size();
|
||||||
|
M= mat[0].size();
|
||||||
|
}
|
||||||
|
template<class T> void SizeSquare(Matrix<T> & mat, int &N)
|
||||||
|
{
|
||||||
|
int M; Size(mat,N,M);
|
||||||
|
assert(N==M);
|
||||||
|
}
|
||||||
|
template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1)
|
||||||
|
{
|
||||||
|
int N2,M2;
|
||||||
|
Size(mat1,N1,M1);
|
||||||
|
Size(mat2,N2,M2);
|
||||||
|
assert(N1==N2);
|
||||||
|
assert(M1==M2);
|
||||||
|
}
|
||||||
|
|
||||||
|
//*****************************************
|
||||||
|
//* (Complex) Vector operations *
|
||||||
|
//*****************************************
|
||||||
|
|
||||||
|
/**Conj of a Vector **/
|
||||||
|
template <class T> Vector<T> conj(Vector<T> p){
|
||||||
|
Vector<T> q(p.size());
|
||||||
|
for(int i=0;i<p.size();i++){q[i] = conj(p[i]);}
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Norm of a Vector**/
|
||||||
|
template <class T> T norm(Vector<T> p){
|
||||||
|
T sum = 0;
|
||||||
|
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
|
||||||
|
return abs(sqrt(sum));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Norm squared of a Vector **/
|
||||||
|
template <class T> T norm2(Vector<T> p){
|
||||||
|
T sum = 0;
|
||||||
|
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
|
||||||
|
return abs((sum));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sum elements of a Vector **/
|
||||||
|
template <class T> T trace(Vector<T> p){
|
||||||
|
T sum = 0;
|
||||||
|
for(int i=0;i<p.size();i++){sum = sum + p[i];}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fill a Vector with constant c **/
|
||||||
|
template <class T> void Fill(Vector<T> &p, T c){
|
||||||
|
for(int i=0;i<p.size();i++){p[i] = c;}
|
||||||
|
}
|
||||||
|
/** Normalize a Vector **/
|
||||||
|
template <class T> void normalize(Vector<T> &p){
|
||||||
|
T m = norm(p);
|
||||||
|
if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;}
|
||||||
|
}
|
||||||
|
/** Vector by scalar **/
|
||||||
|
template <class T, class U> Vector<T> times(Vector<T> p, U s){
|
||||||
|
for(int i=0;i<p.size();i++){p[i] *= s;}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
template <class T, class U> Vector<T> times(U s, Vector<T> p){
|
||||||
|
for(int i=0;i<p.size();i++){p[i] *= s;}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
/** inner product of a and b = conj(a) . b **/
|
||||||
|
template <class T> T inner(Vector<T> a, Vector<T> b){
|
||||||
|
T m = 0.;
|
||||||
|
for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];}
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
/** sum of a and b = a + b **/
|
||||||
|
template <class T> Vector<T> add(Vector<T> a, Vector<T> b){
|
||||||
|
Vector<T> m(a.size());
|
||||||
|
for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];}
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
/** sum of a and b = a - b **/
|
||||||
|
template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){
|
||||||
|
Vector<T> m(a.size());
|
||||||
|
for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];}
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*********************************
|
||||||
|
* Matrices *
|
||||||
|
*********************************
|
||||||
|
**/
|
||||||
|
|
||||||
|
template<class T> void Fill(Matrix<T> & mat, T&val) {
|
||||||
|
int N,M;
|
||||||
|
Size(mat,N,M);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<M;j++){
|
||||||
|
mat[i][j] = val;
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Transpose of a matrix **/
|
||||||
|
Matrix<T> Transpose(Matrix<T> & mat){
|
||||||
|
int N,M;
|
||||||
|
Size(mat,N,M);
|
||||||
|
Matrix C; Resize(C,M,N);
|
||||||
|
for(int i=0;i<M;i++){
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
C[i][j] = mat[j][i];
|
||||||
|
}}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
/** Set Matrix to unit matrix **/
|
||||||
|
template<class T> void Unity(Matrix<T> &mat){
|
||||||
|
int N; SizeSquare(mat,N);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
if ( i==j ) A[i][j] = 1;
|
||||||
|
else A[i][j] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/** Add C * I to matrix **/
|
||||||
|
template<class T>
|
||||||
|
void PlusUnit(Matrix<T> & A,T c){
|
||||||
|
int dim; SizeSquare(A,dim);
|
||||||
|
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** return the Hermitian conjugate of matrix **/
|
||||||
|
Matrix<T> HermitianConj(Matrix<T> &mat){
|
||||||
|
|
||||||
|
int dim; SizeSquare(mat,dim);
|
||||||
|
|
||||||
|
Matrix<T> C; Resize(C,dim,dim);
|
||||||
|
|
||||||
|
for(int i=0;i<dim;i++){
|
||||||
|
for(int j=0;j<dim;j++){
|
||||||
|
C[i][j] = conj(mat[j][i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** return diagonal entries as a Vector **/
|
||||||
|
Vector<T> diag(Matrix<T> &A)
|
||||||
|
{
|
||||||
|
int dim; SizeSquare(A,dim);
|
||||||
|
Vector<T> d; Resize(d,dim);
|
||||||
|
|
||||||
|
for(int i=0;i<dim;i++){
|
||||||
|
d[i] = A[i][i];
|
||||||
|
}
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Left multiply by a Vector **/
|
||||||
|
Vector<T> operator *(Vector<T> &B,Matrix<T> &A)
|
||||||
|
{
|
||||||
|
int K,M,N;
|
||||||
|
Size(B,K);
|
||||||
|
Size(A,M,N);
|
||||||
|
assert(K==M);
|
||||||
|
|
||||||
|
Vector<T> C; Resize(C,N);
|
||||||
|
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
T sum = 0.0;
|
||||||
|
for(int i=0;i<M;i++){
|
||||||
|
sum += B[i] * A[i][j];
|
||||||
|
}
|
||||||
|
C[j] = sum;
|
||||||
|
}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** return 1/diagonal entries as a Vector **/
|
||||||
|
Vector<T> inv_diag(Matrix<T> & A){
|
||||||
|
int dim; SizeSquare(A,dim);
|
||||||
|
Vector<T> d; Resize(d,dim);
|
||||||
|
for(int i=0;i<dim;i++){
|
||||||
|
d[i] = 1.0/A[i][i];
|
||||||
|
}
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
/** Matrix Addition **/
|
||||||
|
inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B)
|
||||||
|
{
|
||||||
|
int N,M ; SizeSame(A,B,N,M);
|
||||||
|
Matrix C; Resize(C,N,M);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<M;j++){
|
||||||
|
C[i][j] = A[i][j] + B[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
/** Matrix Subtraction **/
|
||||||
|
inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){
|
||||||
|
int N,M ; SizeSame(A,B,N,M);
|
||||||
|
Matrix C; Resize(C,N,M);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<M;j++){
|
||||||
|
C[i][j] = A[i][j] - B[i][j];
|
||||||
|
}}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Matrix scalar multiplication **/
|
||||||
|
inline Matrix<T> operator* (Matrix<T> & A,T c){
|
||||||
|
int N,M; Size(A,N,M);
|
||||||
|
Matrix C; Resize(C,N,M);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<M;j++){
|
||||||
|
C[i][j] = A[i][j]*c;
|
||||||
|
}}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
/** Matrix Matrix multiplication **/
|
||||||
|
inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){
|
||||||
|
int K,L,N,M;
|
||||||
|
Size(A,K,L);
|
||||||
|
Size(B,N,M); assert(L==N);
|
||||||
|
Matrix C; Resize(C,K,M);
|
||||||
|
|
||||||
|
for(int i=0;i<K;i++){
|
||||||
|
for(int j=0;j<M;j++){
|
||||||
|
T sum = 0.0;
|
||||||
|
for(int k=0;k<N;k++) sum += A[i][k]*B[k][j];
|
||||||
|
C[i][j] =sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
/** Matrix Vector multiplication **/
|
||||||
|
inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){
|
||||||
|
int M,N,K;
|
||||||
|
Size(A,N,M);
|
||||||
|
Size(B,K); assert(K==M);
|
||||||
|
Vector<T> C; Resize(C,N);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
T sum = 0.0;
|
||||||
|
for(int j=0;j<M;j++) sum += A[i][j]*B[j];
|
||||||
|
C[i] = sum;
|
||||||
|
}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Some version of Matrix norm **/
|
||||||
|
/*
|
||||||
|
inline T Norm(){ // this is not a usual L2 norm
|
||||||
|
T norm = 0;
|
||||||
|
for(int i=0;i<dim;i++){
|
||||||
|
for(int j=0;j<dim;j++){
|
||||||
|
norm += abs(A[i][j]);
|
||||||
|
}}
|
||||||
|
return norm;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Some version of Matrix norm **/
|
||||||
|
template<class T> T LargestDiag(Matrix<T> &A)
|
||||||
|
{
|
||||||
|
int dim ; SizeSquare(A,dim);
|
||||||
|
|
||||||
|
T ld = abs(A[0][0]);
|
||||||
|
for(int i=1;i<dim;i++){
|
||||||
|
T cf = abs(A[i][i]);
|
||||||
|
if(abs(cf) > abs(ld) ){ld = cf;}
|
||||||
|
}
|
||||||
|
return ld;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
|
||||||
|
template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small)
|
||||||
|
{
|
||||||
|
int dim; SizeSquare(A,dim);
|
||||||
|
for(int l = dim - 1 - offset; l >= 1; l--) {
|
||||||
|
if((U)abs(A[l][l - 1]) < (U)small) {
|
||||||
|
A[l][l-1]=(U)0.0;
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
|
||||||
|
template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small)
|
||||||
|
{
|
||||||
|
int dim; SizeSquare(A,dim);
|
||||||
|
for(int l = dim - 1 - offset; l >= 1; l--) {
|
||||||
|
if((U)abs(A[l][l - 1]) < (U)small) {
|
||||||
|
A[l][l - 1] = (U)0.0;
|
||||||
|
A[l - 1][l] = (U)0.0;
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/**Assign a submatrix to a larger one**/
|
||||||
|
template<class T>
|
||||||
|
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
|
||||||
|
{
|
||||||
|
for(int i = row_st; i<row_end; i++){
|
||||||
|
for(int j = col_st; j<col_end; j++){
|
||||||
|
A[i][j] = S[i - row_st][j - col_st];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**Get a square submatrix**/
|
||||||
|
template <class T>
|
||||||
|
Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end)
|
||||||
|
{
|
||||||
|
Matrix<T> H; Resize(row_end - row_st,col_end-col_st);
|
||||||
|
|
||||||
|
for(int i = row_st; i<row_end; i++){
|
||||||
|
for(int j = col_st; j<col_end; j++){
|
||||||
|
H[i-row_st][j-col_st]=A[i][j];
|
||||||
|
}}
|
||||||
|
return H;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/
|
||||||
|
template<class T>
|
||||||
|
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
|
||||||
|
{
|
||||||
|
for(int i = row_st; i<row_end; i++){
|
||||||
|
for(int j = col_st; j<col_end; j++){
|
||||||
|
A[i][j] = S[i - row_st][j - col_st];
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** compute b_i A_ij b_j **/ // surprised no Conj
|
||||||
|
template<class T> T proj(Matrix<T> A, Vector<T> B){
|
||||||
|
int dim; SizeSquare(A,dim);
|
||||||
|
int dimB; Size(B,dimB);
|
||||||
|
assert(dimB==dim);
|
||||||
|
T C = 0;
|
||||||
|
for(int i=0;i<dim;i++){
|
||||||
|
T sum = 0.0;
|
||||||
|
for(int j=0;j<dim;j++){
|
||||||
|
sum += A[i][j]*B[j];
|
||||||
|
}
|
||||||
|
C += B[i]*sum; // No conj?
|
||||||
|
}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*************************************************************
|
||||||
|
*
|
||||||
|
* Matrix Vector products
|
||||||
|
*
|
||||||
|
*************************************************************
|
||||||
|
*/
|
||||||
|
// Instead make a linop and call my CG;
|
||||||
|
|
||||||
|
/// q -> q Q
|
||||||
|
template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q)
|
||||||
|
{
|
||||||
|
int M; SizeSquare(Q,M);
|
||||||
|
int N; Size(q,N);
|
||||||
|
assert(M==N);
|
||||||
|
|
||||||
|
times(q,Q,N);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// q -> q Q
|
||||||
|
template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N)
|
||||||
|
{
|
||||||
|
GridBase *grid = q[0]._grid;
|
||||||
|
int M; SizeSquare(Q,M);
|
||||||
|
int K; Size(q,K);
|
||||||
|
assert(N<M);
|
||||||
|
assert(N<K);
|
||||||
|
Vector<Fermion> S(N,grid );
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
S[j] = zero;
|
||||||
|
for(int k=0;k<N;k++){
|
||||||
|
S[j] = S[j] + q[k]* Q[k][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int j=0;j<q.size();j++){
|
||||||
|
q[j] = S[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
48
lib/algorithms/iterative/MatrixUtils.h
Normal file
48
lib/algorithms/iterative/MatrixUtils.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#ifndef GRID_MATRIX_UTILS_H
|
||||||
|
#define GRID_MATRIX_UTILS_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
namespace MatrixUtils {
|
||||||
|
|
||||||
|
template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
|
||||||
|
N=A.size(); assert(N>0);
|
||||||
|
M=A[0].size();
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
assert(A[i].size()==M);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
|
||||||
|
{
|
||||||
|
int M;
|
||||||
|
Size(A,N,M);
|
||||||
|
assert(N==M);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> inline void Fill(Matrix<T>& A,T & val)
|
||||||
|
{
|
||||||
|
int N,M;
|
||||||
|
Size(A,N,M);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<M;j++){
|
||||||
|
A[i][j]=val;
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
template<class T> inline void Diagonal(Matrix<T>& A,T & val)
|
||||||
|
{
|
||||||
|
int N;
|
||||||
|
SizeSquare(A,N);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
A[i][i]=val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class T> inline void Identity(Matrix<T>& A)
|
||||||
|
{
|
||||||
|
Fill(A,0.0);
|
||||||
|
Diagonal(A,1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
92
lib/algorithms/iterative/PrecConjugateResidual.h
Normal file
92
lib/algorithms/iterative/PrecConjugateResidual.h
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
#ifndef GRID_PREC_CONJUGATE_RESIDUAL_H
|
||||||
|
#define GRID_PREC_CONJUGATE_RESIDUAL_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Base classes for iterative processes based on operators
|
||||||
|
// single input vec, single output vec.
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class PrecConjugateResidual : public OperatorFunction<Field> {
|
||||||
|
public:
|
||||||
|
RealD Tolerance;
|
||||||
|
Integer MaxIterations;
|
||||||
|
int verbose;
|
||||||
|
LinearFunction<Field> &Preconditioner;
|
||||||
|
|
||||||
|
PrecConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec) : Tolerance(tol), MaxIterations(maxit), Preconditioner(Prec)
|
||||||
|
{
|
||||||
|
verbose=1;
|
||||||
|
};
|
||||||
|
|
||||||
|
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||||
|
|
||||||
|
RealD a, b, c, d;
|
||||||
|
RealD cp, ssq,rsq;
|
||||||
|
|
||||||
|
RealD rAr, rAAr, rArp;
|
||||||
|
RealD pAp, pAAp;
|
||||||
|
|
||||||
|
GridBase *grid = src._grid;
|
||||||
|
Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid);
|
||||||
|
|
||||||
|
psi=zero;
|
||||||
|
r = src;
|
||||||
|
Preconditioner(r,p);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
|
||||||
|
Ar=Ap;
|
||||||
|
rAr=pAp;
|
||||||
|
rAAr=pAAp;
|
||||||
|
|
||||||
|
cp =norm2(r);
|
||||||
|
ssq=norm2(src);
|
||||||
|
rsq=Tolerance*Tolerance*ssq;
|
||||||
|
|
||||||
|
if (verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||||
|
|
||||||
|
for(int k=0;k<MaxIterations;k++){
|
||||||
|
|
||||||
|
|
||||||
|
Preconditioner(Ap,z);
|
||||||
|
RealD rq= real(innerProduct(Ap,z));
|
||||||
|
|
||||||
|
a = rAr/rq;
|
||||||
|
|
||||||
|
axpy(psi,a,p,psi);
|
||||||
|
cp = axpy_norm(r,-a,z,r);
|
||||||
|
|
||||||
|
rArp=rAr;
|
||||||
|
|
||||||
|
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||||
|
|
||||||
|
b =rAr/rArp;
|
||||||
|
|
||||||
|
axpy(p,b,p,r);
|
||||||
|
pAAp=axpy_norm(Ap,b,Ap,Ar);
|
||||||
|
|
||||||
|
if(verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||||
|
|
||||||
|
if(cp<rsq) {
|
||||||
|
Linop.HermOp(psi,Ap);
|
||||||
|
axpy(r,-1.0,src,Ap);
|
||||||
|
RealD true_resid = norm2(r)/ssq;
|
||||||
|
std::cout<<GridLogMessage<<"PrecConjugateResidual: Converged on iteration " <<k
|
||||||
|
<< " computed residual "<<sqrt(cp/ssq)
|
||||||
|
<< " true residual "<<sqrt(true_resid)
|
||||||
|
<< " target " <<Tolerance <<std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"PrecConjugateResidual did NOT converge"<<std::endl;
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
175
lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
Normal file
175
lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
#ifndef GRID_PREC_GCR_H
|
||||||
|
#define GRID_PREC_GCR_H
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//VPGCR Abe and Zhang, 2005.
|
||||||
|
//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING
|
||||||
|
//Computing and Information Volume 2, Number 2, Pages 147-161
|
||||||
|
//NB. Likely not original reference since they are focussing on a preconditioner variant.
|
||||||
|
// but VPGCR was nicely written up in their paper
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class PrecGeneralisedConjugateResidual : public OperatorFunction<Field> {
|
||||||
|
public:
|
||||||
|
RealD Tolerance;
|
||||||
|
Integer MaxIterations;
|
||||||
|
int verbose;
|
||||||
|
int mmax;
|
||||||
|
int nstep;
|
||||||
|
int steps;
|
||||||
|
LinearFunction<Field> &Preconditioner;
|
||||||
|
|
||||||
|
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
||||||
|
Tolerance(tol),
|
||||||
|
MaxIterations(maxit),
|
||||||
|
Preconditioner(Prec),
|
||||||
|
mmax(_mmax),
|
||||||
|
nstep(_nstep)
|
||||||
|
{
|
||||||
|
verbose=1;
|
||||||
|
};
|
||||||
|
|
||||||
|
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||||
|
|
||||||
|
psi=zero;
|
||||||
|
RealD cp, ssq,rsq;
|
||||||
|
ssq=norm2(src);
|
||||||
|
rsq=Tolerance*Tolerance*ssq;
|
||||||
|
|
||||||
|
Field r(src._grid);
|
||||||
|
|
||||||
|
steps=0;
|
||||||
|
for(int k=0;k<MaxIterations;k++){
|
||||||
|
|
||||||
|
cp=GCRnStep(Linop,src,psi,rsq);
|
||||||
|
|
||||||
|
if ( verbose ) std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
|
||||||
|
|
||||||
|
if(cp<rsq) {
|
||||||
|
Linop.HermOp(psi,r);
|
||||||
|
axpy(r,-1.0,src,r);
|
||||||
|
RealD tr = norm2(r);
|
||||||
|
std::cout<<GridLogMessage<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps
|
||||||
|
<< " computed residual "<<sqrt(cp/ssq)
|
||||||
|
<< " true residual " <<sqrt(tr/ssq)
|
||||||
|
<< " target " <<Tolerance <<std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
|
||||||
|
|
||||||
|
RealD cp;
|
||||||
|
RealD a, b, c, d;
|
||||||
|
RealD zAz, zAAz;
|
||||||
|
RealD rAq, rq;
|
||||||
|
|
||||||
|
GridBase *grid = src._grid;
|
||||||
|
|
||||||
|
Field r(grid);
|
||||||
|
Field z(grid);
|
||||||
|
Field tmp(grid);
|
||||||
|
Field ttmp(grid);
|
||||||
|
Field Az(grid);
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// history for flexible orthog
|
||||||
|
////////////////////////////////
|
||||||
|
std::vector<Field> q(mmax,grid);
|
||||||
|
std::vector<Field> p(mmax,grid);
|
||||||
|
std::vector<RealD> qq(mmax);
|
||||||
|
|
||||||
|
//////////////////////////////////
|
||||||
|
// initial guess x0 is taken as nonzero.
|
||||||
|
// r0=src-A x0 = src
|
||||||
|
//////////////////////////////////
|
||||||
|
Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
|
||||||
|
r=src-Az;
|
||||||
|
|
||||||
|
/////////////////////
|
||||||
|
// p = Prec(r)
|
||||||
|
/////////////////////
|
||||||
|
Preconditioner(r,z);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<< " Preconditioner in " << norm2(r)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<< " Preconditioner out " << norm2(z)<<std::endl;
|
||||||
|
|
||||||
|
Linop.HermOp(z,tmp);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<< " Preconditioner Aout " << norm2(tmp)<<std::endl;
|
||||||
|
ttmp=tmp;
|
||||||
|
tmp=tmp-r;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<< " Preconditioner resid " << std::sqrt(norm2(tmp)/norm2(r))<<std::endl;
|
||||||
|
/*
|
||||||
|
std::cout<<GridLogMessage<<r<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<z<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<ttmp<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<tmp<<std::endl;
|
||||||
|
*/
|
||||||
|
|
||||||
|
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||||
|
|
||||||
|
//p[0],q[0],qq[0]
|
||||||
|
p[0]= z;
|
||||||
|
q[0]= Az;
|
||||||
|
qq[0]= zAAz;
|
||||||
|
|
||||||
|
cp =norm2(r);
|
||||||
|
|
||||||
|
for(int k=0;k<nstep;k++){
|
||||||
|
|
||||||
|
steps++;
|
||||||
|
|
||||||
|
int kp = k+1;
|
||||||
|
int peri_k = k %mmax;
|
||||||
|
int peri_kp= kp%mmax;
|
||||||
|
|
||||||
|
rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
|
||||||
|
a = rq/qq[peri_k];
|
||||||
|
|
||||||
|
axpy(psi,a,p[peri_k],psi);
|
||||||
|
|
||||||
|
cp = axpy_norm(r,-a,q[peri_k],r);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<< " VPGCR_step resid" <<sqrt(cp/rsq)<<std::endl;
|
||||||
|
if((k==nstep-1)||(cp<rsq)){
|
||||||
|
return cp;
|
||||||
|
}
|
||||||
|
|
||||||
|
Preconditioner(r,z);// solve Az = r
|
||||||
|
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||||
|
|
||||||
|
|
||||||
|
Linop.HermOp(z,tmp);
|
||||||
|
tmp=tmp-r;
|
||||||
|
std::cout<<GridLogMessage<< " Preconditioner resid" <<sqrt(norm2(tmp)/norm2(r))<<std::endl;
|
||||||
|
|
||||||
|
q[peri_kp]=Az;
|
||||||
|
p[peri_kp]=z;
|
||||||
|
|
||||||
|
int northog = ((kp)>(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history.
|
||||||
|
for(int back=0;back<northog;back++){
|
||||||
|
|
||||||
|
int peri_back=(k-back)%mmax; assert((k-back)>=0);
|
||||||
|
|
||||||
|
b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
|
||||||
|
p[peri_kp]=p[peri_kp]+b*p[peri_back];
|
||||||
|
q[peri_kp]=q[peri_kp]+b*q[peri_back];
|
||||||
|
|
||||||
|
}
|
||||||
|
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
assert(0); // never reached
|
||||||
|
return cp;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@ -89,7 +89,7 @@ namespace Grid {
|
|||||||
//////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////
|
||||||
// Call the red-black solver
|
// Call the red-black solver
|
||||||
//////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////
|
||||||
std::cout << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||||
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||||
|
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
@ -108,7 +108,7 @@ namespace Grid {
|
|||||||
RealD ns = norm2(in);
|
RealD ns = norm2(in);
|
||||||
RealD nr = norm2(resid);
|
RealD nr = norm2(resid);
|
||||||
|
|
||||||
std::cout << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
122
lib/algorithms/iterative/bisec.c
Normal file
122
lib/algorithms/iterative/bisec.c
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
struct Bisection {
|
||||||
|
|
||||||
|
static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
std::vector<RealD> evec1(row_num+3);
|
||||||
|
std::vector<RealD> evec2(row_num+3);
|
||||||
|
RealD eps2;
|
||||||
|
ALPHA[1]=0.;
|
||||||
|
BETHA[1]=0.;
|
||||||
|
for(i=0;i<row_num-1;i++) {
|
||||||
|
ALPHA[i+1] = A[i*(row_num+1)].real();
|
||||||
|
BETHA[i+2] = A[i*(row_num+1)+1].real();
|
||||||
|
}
|
||||||
|
ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
|
||||||
|
bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
|
||||||
|
bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
|
||||||
|
|
||||||
|
// Do we really need to sort here?
|
||||||
|
int begin=1;
|
||||||
|
int end = row_num;
|
||||||
|
int swapped=1;
|
||||||
|
while(swapped) {
|
||||||
|
swapped=0;
|
||||||
|
for(i=begin;i<end;i++){
|
||||||
|
if(mag(evec2[i])>mag(evec2[i+1])) {
|
||||||
|
swap(evec2+i,evec2+i+1);
|
||||||
|
swapped=1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end--;
|
||||||
|
for(i=end-1;i>=begin;i--){
|
||||||
|
if(mag(evec2[i])>mag(evec2[i+1])) {
|
||||||
|
swap(evec2+i,evec2+i+1);
|
||||||
|
swapped=1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
begin++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i=0;i<row_num;i++){
|
||||||
|
for(j=0;j<row_num;j++) {
|
||||||
|
if(i==j) H[i*row_num+j]=evec2[i+1];
|
||||||
|
else H[i*row_num+j]=0.;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bisec(std::vector<RealD> &c,
|
||||||
|
std::vector<RealD> &b,
|
||||||
|
int n,
|
||||||
|
int m1,
|
||||||
|
int m2,
|
||||||
|
RealD eps1,
|
||||||
|
RealD relfeh,
|
||||||
|
std::vector<RealD> &x,
|
||||||
|
RealD &eps2)
|
||||||
|
{
|
||||||
|
std::vector<RealD> wu(n+2);
|
||||||
|
|
||||||
|
RealD h,q,x1,xu,x0,xmin,xmax;
|
||||||
|
int i,a,k;
|
||||||
|
|
||||||
|
b[1]=0.0;
|
||||||
|
xmin=c[n]-fabs(b[n]);
|
||||||
|
xmax=c[n]+fabs(b[n]);
|
||||||
|
for(i=1;i<n;i++){
|
||||||
|
h=fabs(b[i])+fabs(b[i+1]);
|
||||||
|
if(c[i]+h>xmax) xmax= c[i]+h;
|
||||||
|
if(c[i]-h<xmin) xmin= c[i]-h;
|
||||||
|
}
|
||||||
|
xmax *=2.;
|
||||||
|
|
||||||
|
eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
|
||||||
|
if(eps1<=0.0) eps1=eps2;
|
||||||
|
eps2=0.5*eps1+7.0*(eps2);
|
||||||
|
x0=xmax;
|
||||||
|
for(i=m1;i<=m2;i++){
|
||||||
|
x[i]=xmax;
|
||||||
|
wu[i]=xmin;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(k=m2;k>=m1;k--){
|
||||||
|
xu=xmin;
|
||||||
|
i=k;
|
||||||
|
do{
|
||||||
|
if(xu<wu[i]){
|
||||||
|
xu=wu[i];
|
||||||
|
i=m1-1;
|
||||||
|
}
|
||||||
|
i--;
|
||||||
|
}while(i>=m1);
|
||||||
|
if(x0>x[k]) x0=x[k];
|
||||||
|
while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
|
||||||
|
x1=(xu+x0)/2;
|
||||||
|
|
||||||
|
a=0;
|
||||||
|
q=1.0;
|
||||||
|
for(i=1;i<=n;i++){
|
||||||
|
q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
|
||||||
|
if(q<0) a++;
|
||||||
|
}
|
||||||
|
// printf("x1=%e a=%d\n",x1,a);
|
||||||
|
if(a<k){
|
||||||
|
if(a<m1){
|
||||||
|
xu=x1;
|
||||||
|
wu[m1]=x1;
|
||||||
|
}else {
|
||||||
|
xu=x1;
|
||||||
|
wu[a+1]=x1;
|
||||||
|
if(x[a]>x1) x[a]=x1;
|
||||||
|
}
|
||||||
|
}else x0=x1;
|
||||||
|
}
|
||||||
|
x[k]=(x0+xu)/2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
1
lib/algorithms/iterative/get_eig.c
Normal file
1
lib/algorithms/iterative/get_eig.c
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
@ -87,6 +87,14 @@ class CartesianCommunicator {
|
|||||||
void *recv,
|
void *recv,
|
||||||
int recv_from_rank,
|
int recv_from_rank,
|
||||||
int bytes);
|
int bytes);
|
||||||
|
|
||||||
|
void RecvFrom(void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes);
|
||||||
|
void SendTo(void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
int bytes);
|
||||||
|
|
||||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
void *xmit,
|
void *xmit,
|
||||||
int xmit_to_rank,
|
int xmit_to_rank,
|
||||||
|
@ -81,13 +81,30 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
|||||||
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||||
SendToRecvFromComplete(reqs);
|
SendToRecvFromComplete(reqs);
|
||||||
}
|
}
|
||||||
|
void CartesianCommunicator::RecvFrom(void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Status stat;
|
||||||
|
int ierr=MPI_Recv(recv, bytes, MPI_CHAR,from,from,communicator,&stat);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::SendTo(void *xmit,
|
||||||
|
int dest,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
int rank = _processor; // used for tag; must know who it comes from
|
||||||
|
int ierr = MPI_Send(xmit, bytes, MPI_CHAR,dest,_processor,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
// Basic Halo comms primitive
|
// Basic Halo comms primitive
|
||||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
void *xmit,
|
void *xmit,
|
||||||
int dest,
|
int dest,
|
||||||
void *recv,
|
void *recv,
|
||||||
int from,
|
int from,
|
||||||
int bytes)
|
int bytes)
|
||||||
{
|
{
|
||||||
MPI_Request xrq;
|
MPI_Request xrq;
|
||||||
MPI_Request rrq;
|
MPI_Request rrq;
|
||||||
@ -100,7 +117,6 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
|||||||
|
|
||||||
list.push_back(xrq);
|
list.push_back(xrq);
|
||||||
list.push_back(rrq);
|
list.push_back(rrq);
|
||||||
|
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
{
|
{
|
||||||
|
@ -22,6 +22,20 @@ void CartesianCommunicator::GlobalSum(double &){}
|
|||||||
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
||||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
||||||
|
|
||||||
|
void CartesianCommunicator::RecvFrom(void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::SendTo(void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Basic Halo comms primitive -- should never call in single node
|
// Basic Halo comms primitive -- should never call in single node
|
||||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||||
int dest,
|
int dest,
|
||||||
|
@ -8,7 +8,7 @@ class SimpleCompressor {
|
|||||||
public:
|
public:
|
||||||
void Point(int) {};
|
void Point(int) {};
|
||||||
|
|
||||||
vobj operator() (const vobj &arg) {
|
vobj operator() (const vobj &arg,int dimension,int plane,int osite,GridBase *grid) {
|
||||||
return arg;
|
return arg;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -24,21 +24,32 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
|
|||||||
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||||
cbmask = 0x3;
|
cbmask = 0x3;
|
||||||
}
|
}
|
||||||
|
|
||||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
|
if ( cbmask == 0x3 ) {
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*rhs._grid->_slice_stride[dimension];
|
int o = n*rhs._grid->_slice_stride[dimension];
|
||||||
int bo = n*rhs._grid->_slice_block[dimension];
|
int bo = n*rhs._grid->_slice_block[dimension];
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
buffer[bo+b]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
|
||||||
if ( ocb &cbmask ) {
|
|
||||||
buffer[bo+b]=compress(rhs._odata[so+o+b]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
int bo=0;
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
int o = n*rhs._grid->_slice_stride[dimension];
|
||||||
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||||
|
if ( ocb &cbmask ) {
|
||||||
|
buffer[bo++]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,18 +70,33 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
|
|||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
|
if ( cbmask ==0x3){
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o=n*rhs._grid->_slice_stride[dimension];
|
int o=n*rhs._grid->_slice_stride[dimension];
|
||||||
int offset = b+n*rhs._grid->_slice_block[dimension];
|
int offset = b+n*rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
|
||||||
if ( ocb & cbmask ) {
|
|
||||||
cobj temp;
|
|
||||||
temp =compress(rhs._odata[so+o+b]);
|
|
||||||
extract<cobj>(temp,pointers,offset);
|
extract<cobj>(temp,pointers,offset);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
|
||||||
|
assert(0); //Fixme think this is buggy
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
int o=n*rhs._grid->_slice_stride[dimension];
|
||||||
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||||
|
int offset = b+n*rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
|
if ( ocb & cbmask ) {
|
||||||
|
cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
|
||||||
|
extract<cobj>(temp,pointers,offset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -109,16 +135,28 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<v
|
|||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
|
if ( cbmask ==0x3 ) {
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*rhs._grid->_slice_stride[dimension];
|
int o =n*rhs._grid->_slice_stride[dimension];
|
||||||
int bo =n*rhs._grid->_slice_block[dimension];
|
int bo =n*rhs._grid->_slice_block[dimension];
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
|
||||||
if ( ocb & cbmask ) {
|
|
||||||
rhs._odata[so+o+b]=buffer[bo+b];
|
rhs._odata[so+o+b]=buffer[bo+b];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
int bo=0;
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
int o =n*rhs._grid->_slice_stride[dimension];
|
||||||
|
int bo =n*rhs._grid->_slice_block[dimension];
|
||||||
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||||
|
if ( ocb & cbmask ) {
|
||||||
|
rhs._odata[so+o+b]=buffer[bo++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,16 +175,28 @@ PARALLEL_NESTED_LOOP2
|
|||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
|
if(cbmask ==0x3 ) {
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*rhs._grid->_slice_stride[dimension];
|
int o = n*rhs._grid->_slice_stride[dimension];
|
||||||
int offset = b+n*rhs._grid->_slice_block[dimension];
|
int offset = b+n*rhs._grid->_slice_block[dimension];
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
|
||||||
if ( ocb&cbmask ) {
|
|
||||||
merge(rhs._odata[so+o+b],pointers,offset);
|
merge(rhs._odata[so+o+b],pointers,offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
assert(0); // think this is buggy FIXME
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
int o = n*rhs._grid->_slice_stride[dimension];
|
||||||
|
int offset = b+n*rhs._grid->_slice_block[dimension];
|
||||||
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||||
|
if ( ocb&cbmask ) {
|
||||||
|
merge(rhs._odata[so+o+b],pointers,offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -166,17 +216,29 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
|||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
|
if(cbmask == 0x3 ){
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o =n*rhs._grid->_slice_stride[dimension]+b;
|
int o =n*rhs._grid->_slice_stride[dimension]+b;
|
||||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||||
if ( ocb&cbmask ) {
|
|
||||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
|
||||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PARALLEL_NESTED_LOOP2
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
|
int o =n*rhs._grid->_slice_stride[dimension]+b;
|
||||||
|
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||||
|
if ( ocb&cbmask ) {
|
||||||
|
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||||
|
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
|||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
|
||||||
Lattice<vobj> ret(rhs._grid);
|
Lattice<vobj> ret(rhs._grid);
|
||||||
|
|
||||||
int fd = rhs._grid->_fdimensions[dimension];
|
int fd = rhs._grid->_fdimensions[dimension];
|
||||||
int rd = rhs._grid->_rdimensions[dimension];
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
@ -26,10 +26,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
|||||||
|
|
||||||
|
|
||||||
if ( !comm_dim ) {
|
if ( !comm_dim ) {
|
||||||
|
// std::cout << "Cshift_local" <<std::endl;
|
||||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||||
} else if ( splice_dim ) {
|
} else if ( splice_dim ) {
|
||||||
|
// std::cout << "Cshift_comms_simd" <<std::endl;
|
||||||
Cshift_comms_simd(ret,rhs,dimension,shift);
|
Cshift_comms_simd(ret,rhs,dimension,shift);
|
||||||
} else {
|
} else {
|
||||||
|
// std::cout << "Cshift_comms" <<std::endl;
|
||||||
Cshift_comms(ret,rhs,dimension,shift);
|
Cshift_comms(ret,rhs,dimension,shift);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -42,9 +45,13 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &r
|
|||||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||||
|
|
||||||
|
// std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||||
|
|
||||||
if ( sshift[0] == sshift[1] ) {
|
if ( sshift[0] == sshift[1] ) {
|
||||||
|
// std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||||
Cshift_comms(ret,rhs,dimension,shift,0x3);
|
Cshift_comms(ret,rhs,dimension,shift,0x3);
|
||||||
} else {
|
} else {
|
||||||
|
// std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||||
Cshift_comms(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
Cshift_comms(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||||
Cshift_comms(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
Cshift_comms(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||||
}
|
}
|
||||||
@ -113,12 +120,16 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
int xmit_to_rank;
|
int xmit_to_rank;
|
||||||
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
|
||||||
|
|
||||||
grid->SendToRecvFrom((void *)&send_buf[0],
|
grid->SendToRecvFrom((void *)&send_buf[0],
|
||||||
xmit_to_rank,
|
xmit_to_rank,
|
||||||
(void *)&recv_buf[0],
|
(void *)&recv_buf[0],
|
||||||
recv_from_rank,
|
recv_from_rank,
|
||||||
bytes);
|
bytes);
|
||||||
|
|
||||||
|
// for(int i=0;i<words;i++){
|
||||||
|
// std::cout << "SendRecv ["<<i<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << " 0x" << cbmask<<std::endl;
|
||||||
|
// }
|
||||||
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
|
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -132,18 +132,18 @@ inline void CBFromExpression(int &cb,const T1& lat) // Lattice leaf
|
|||||||
assert(cb==lat.checkerboard);
|
assert(cb==lat.checkerboard);
|
||||||
}
|
}
|
||||||
cb=lat.checkerboard;
|
cb=lat.checkerboard;
|
||||||
// std::cout<<"Lattice leaf cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
||||||
inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf
|
inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf
|
||||||
{
|
{
|
||||||
// std::cout<<"Non lattice leaf cb"<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
|
inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
|
||||||
{
|
{
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
||||||
// std::cout<<"Unary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2>
|
template <typename Op, typename T1, typename T2>
|
||||||
@ -151,7 +151,7 @@ inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &ex
|
|||||||
{
|
{
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
||||||
CBFromExpression(cb,std::get<1>(expr.second));
|
CBFromExpression(cb,std::get<1>(expr.second));
|
||||||
// std::cout<<"Binary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1, typename T2, typename T3>
|
template <typename Op, typename T1, typename T2, typename T3>
|
||||||
inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
|
inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
|
||||||
@ -159,7 +159,7 @@ inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3
|
|||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
||||||
CBFromExpression(cb,std::get<1>(expr.second));
|
CBFromExpression(cb,std::get<1>(expr.second));
|
||||||
CBFromExpression(cb,std::get<2>(expr.second));
|
CBFromExpression(cb,std::get<2>(expr.second));
|
||||||
// std::cout<<"Trinary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
@ -178,6 +178,7 @@ GridUnopClass(UnaryConj,conjugate(a));
|
|||||||
GridUnopClass(UnaryTrace,trace(a));
|
GridUnopClass(UnaryTrace,trace(a));
|
||||||
GridUnopClass(UnaryTranspose,transpose(a));
|
GridUnopClass(UnaryTranspose,transpose(a));
|
||||||
GridUnopClass(UnaryTa,Ta(a));
|
GridUnopClass(UnaryTa,Ta(a));
|
||||||
|
GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a));
|
||||||
GridUnopClass(UnaryReal,real(a));
|
GridUnopClass(UnaryReal,real(a));
|
||||||
GridUnopClass(UnaryImag,imag(a));
|
GridUnopClass(UnaryImag,imag(a));
|
||||||
GridUnopClass(UnaryToReal,toReal(a));
|
GridUnopClass(UnaryToReal,toReal(a));
|
||||||
@ -290,13 +291,14 @@ GRID_DEF_UNOP(conjugate,UnaryConj);
|
|||||||
GRID_DEF_UNOP(trace,UnaryTrace);
|
GRID_DEF_UNOP(trace,UnaryTrace);
|
||||||
GRID_DEF_UNOP(transpose,UnaryTranspose);
|
GRID_DEF_UNOP(transpose,UnaryTranspose);
|
||||||
GRID_DEF_UNOP(Ta,UnaryTa);
|
GRID_DEF_UNOP(Ta,UnaryTa);
|
||||||
|
GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup);
|
||||||
GRID_DEF_UNOP(real,UnaryReal);
|
GRID_DEF_UNOP(real,UnaryReal);
|
||||||
GRID_DEF_UNOP(imag,UnaryImag);
|
GRID_DEF_UNOP(imag,UnaryImag);
|
||||||
GRID_DEF_UNOP(toReal,UnaryToReal);
|
GRID_DEF_UNOP(toReal,UnaryToReal);
|
||||||
GRID_DEF_UNOP(toComplex,UnaryToComplex);
|
GRID_DEF_UNOP(toComplex,UnaryToComplex);
|
||||||
GRID_DEF_UNOP(abs ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing
|
GRID_DEF_UNOP(abs ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing
|
||||||
GRID_DEF_UNOP(sqrt ,UnarySqrt);
|
GRID_DEF_UNOP(sqrt ,UnarySqrt);
|
||||||
GRID_DEF_UNOP(rsqrt,UnarySqrt);
|
GRID_DEF_UNOP(rsqrt,UnaryRsqrt);
|
||||||
GRID_DEF_UNOP(sin ,UnarySin);
|
GRID_DEF_UNOP(sin ,UnarySin);
|
||||||
GRID_DEF_UNOP(cos ,UnaryCos);
|
GRID_DEF_UNOP(cos ,UnaryCos);
|
||||||
GRID_DEF_UNOP(log ,UnaryLog);
|
GRID_DEF_UNOP(log ,UnaryLog);
|
||||||
@ -370,7 +372,7 @@ using namespace Grid;
|
|||||||
tmp.func(eval(0,v1),eval(0,v2));
|
tmp.func(eval(0,v1),eval(0,v2));
|
||||||
|
|
||||||
auto var = v1+v2;
|
auto var = v1+v2;
|
||||||
std::cout<<typeid(var).name()<<std::endl;
|
std::cout<<GridLogMessage<<typeid(var).name()<<std::endl;
|
||||||
|
|
||||||
v3=v1+v2;
|
v3=v1+v2;
|
||||||
v3=v1+v2+v1*v2;
|
v3=v1+v2+v1*v2;
|
||||||
|
@ -29,6 +29,9 @@ extern int GridCshiftPermuteMap[4][16];
|
|||||||
class LatticeBase {};
|
class LatticeBase {};
|
||||||
class LatticeExpressionBase {};
|
class LatticeExpressionBase {};
|
||||||
|
|
||||||
|
template<class T> using Vector = std::vector<T,alignedAllocator<T> >; // Aligned allocator??
|
||||||
|
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; // Aligned allocator??
|
||||||
|
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
||||||
public:
|
public:
|
||||||
@ -59,7 +62,12 @@ public:
|
|||||||
|
|
||||||
GridBase *_grid;
|
GridBase *_grid;
|
||||||
int checkerboard;
|
int checkerboard;
|
||||||
std::vector<vobj,alignedAllocator<vobj> > _odata;
|
Vector<vobj> _odata;
|
||||||
|
|
||||||
|
// to pthread need a computable loop where loop induction is not required
|
||||||
|
int begin(void) { return 0;};
|
||||||
|
int end(void) { return _odata.size(); }
|
||||||
|
vobj & operator[](int i) { return _odata[i]; };
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
@ -204,9 +212,10 @@ PARALLEL_FOR_LOOP
|
|||||||
// Constructor requires "grid" passed.
|
// Constructor requires "grid" passed.
|
||||||
// what about a default grid?
|
// what about a default grid?
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
|
Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
|
||||||
// _odata.reserve(_grid->oSites());
|
// _odata.reserve(_grid->oSites());
|
||||||
// _odata.resize(_grid->oSites());
|
// _odata.resize(_grid->oSites());
|
||||||
|
// std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
|
||||||
assert((((uint64_t)&_odata[0])&0xF) ==0);
|
assert((((uint64_t)&_odata[0])&0xF) ==0);
|
||||||
checkerboard=0;
|
checkerboard=0;
|
||||||
}
|
}
|
||||||
@ -221,7 +230,7 @@ PARALLEL_FOR_LOOP
|
|||||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||||
this->checkerboard = r.checkerboard;
|
this->checkerboard = r.checkerboard;
|
||||||
conformable(*this,r);
|
conformable(*this,r);
|
||||||
std::cout<<"Lattice operator ="<<std::endl;
|
std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl;
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
this->_odata[ss]=r._odata[ss];
|
this->_odata[ss]=r._odata[ss];
|
||||||
|
@ -10,20 +10,11 @@ namespace Grid {
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Peek internal indices of a Lattice object
|
// Peek internal indices of a Lattice object
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<int Index,class vobj>
|
|
||||||
auto peekIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0]))>
|
|
||||||
{
|
|
||||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
|
||||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss]);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
};
|
|
||||||
template<int Index,class vobj>
|
template<int Index,class vobj>
|
||||||
auto peekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
|
auto PeekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
|
||||||
{
|
{
|
||||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
|
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
|
||||||
|
ret.checkerboard=lhs.checkerboard;
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
|
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
|
||||||
@ -31,9 +22,10 @@ PARALLEL_FOR_LOOP
|
|||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
template<int Index,class vobj>
|
template<int Index,class vobj>
|
||||||
auto peekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
|
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
|
||||||
{
|
{
|
||||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
|
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
|
||||||
|
ret.checkerboard=lhs.checkerboard;
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
|
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
|
||||||
@ -44,16 +36,8 @@ PARALLEL_FOR_LOOP
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Poke internal indices of a Lattice object
|
// Poke internal indices of a Lattice object
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<int Index,class vobj>
|
|
||||||
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> & rhs)
|
|
||||||
{
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
|
||||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
template<int Index,class vobj>
|
template<int Index,class vobj>
|
||||||
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
|
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
|
||||||
{
|
{
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||||
@ -61,7 +45,7 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<int Index,class vobj>
|
template<int Index,class vobj>
|
||||||
void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
|
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
|
||||||
{
|
{
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||||
|
@ -125,7 +125,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
|||||||
assert(grid!=NULL);
|
assert(grid!=NULL);
|
||||||
|
|
||||||
// FIXME
|
// FIXME
|
||||||
std::cout<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
|
std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
|
||||||
|
|
||||||
const int Nd = grid->_ndimension;
|
const int Nd = grid->_ndimension;
|
||||||
const int Nsimd = grid->Nsimd();
|
const int Nsimd = grid->Nsimd();
|
||||||
|
@ -5,6 +5,37 @@
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
// Allow the RNG state to be less dense than the fine grid
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
inline int RNGfillable(GridBase *coarse,GridBase *fine)
|
||||||
|
{
|
||||||
|
|
||||||
|
int rngdims = coarse->_ndimension;
|
||||||
|
|
||||||
|
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
|
||||||
|
int lowerdims = fine->_ndimension - coarse->_ndimension;
|
||||||
|
assert(lowerdims >= 0);
|
||||||
|
for(int d=0;d<lowerdims;d++){
|
||||||
|
assert(fine->_simd_layout[d]==1);
|
||||||
|
assert(fine->_processors[d]==1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// local and global volumes subdivide cleanly after SIMDization
|
||||||
|
int multiplicity=1;
|
||||||
|
for(int d=0;d<rngdims;d++){
|
||||||
|
int fd= d+lowerdims;
|
||||||
|
assert(coarse->_processors[d] == fine->_processors[fd]);
|
||||||
|
assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]);
|
||||||
|
assert((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d]==fine->_rdimensions[fd]);
|
||||||
|
|
||||||
|
multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
return multiplicity;
|
||||||
|
}
|
||||||
|
|
||||||
// Wrap seed_seq to give common interface with random_device
|
// Wrap seed_seq to give common interface with random_device
|
||||||
class fixedSeed {
|
class fixedSeed {
|
||||||
public:
|
public:
|
||||||
@ -226,26 +257,32 @@ namespace Grid {
|
|||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
|
||||||
conformable(_grid,l._grid);
|
int multiplicity = RNGfillable(_grid,l._grid);
|
||||||
|
|
||||||
int Nsimd =_grid->Nsimd();
|
int Nsimd =_grid->Nsimd();
|
||||||
int osites=_grid->oSites();
|
int osites=_grid->oSites();
|
||||||
int words=sizeof(scalar_object)/sizeof(scalar_type);
|
int words=sizeof(scalar_object)/sizeof(scalar_type);
|
||||||
|
|
||||||
std::vector<scalar_object> buf(Nsimd);
|
|
||||||
|
|
||||||
for(int ss=0;ss<osites;ss++){
|
|
||||||
for(int si=0;si<Nsimd;si++){
|
|
||||||
|
|
||||||
int gdx = generator_idx(ss,si); // index of generator state
|
PARALLEL_FOR_LOOP
|
||||||
scalar_type *pointer = (scalar_type *)&buf[si];
|
for(int ss=0;ss<osites;ss++){
|
||||||
for(int idx=0;idx<words;idx++){
|
|
||||||
fillScalar(pointer[idx],dist,_generators[gdx]);
|
std::vector<scalar_object> buf(Nsimd);
|
||||||
|
for(int m=0;m<multiplicity;m++) {// Draw from same generator multiplicity times
|
||||||
|
|
||||||
|
int sm=multiplicity*ss+m; // Maps the generator site to the fine site
|
||||||
|
|
||||||
|
for(int si=0;si<Nsimd;si++){
|
||||||
|
int gdx = generator_idx(ss,si); // index of generator state
|
||||||
|
scalar_type *pointer = (scalar_type *)&buf[si];
|
||||||
|
for(int idx=0;idx<words;idx++){
|
||||||
|
fillScalar(pointer[idx],dist,_generators[gdx]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// merge into SIMD lanes
|
||||||
|
merge(l._odata[sm],buf);
|
||||||
}
|
}
|
||||||
// merge into SIMD lanes
|
|
||||||
merge(l._odata[ss],buf);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ PARALLEL_FOR_LOOP
|
|||||||
// Trace Index level dependent operation
|
// Trace Index level dependent operation
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<int Index,class vobj>
|
template<int Index,class vobj>
|
||||||
inline auto traceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
|
inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
|
||||||
{
|
{
|
||||||
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
|
@ -17,13 +17,14 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// remove and insert a half checkerboard
|
// remove and insert a half checkerboard
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
|
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
|
||||||
half.checkerboard = cb;
|
half.checkerboard = cb;
|
||||||
int ssh=0;
|
int ssh=0;
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
for(int ss=0;ss<full._grid->oSites();ss++){
|
||||||
std::vector<int> coor;
|
std::vector<int> coor;
|
||||||
int cbos;
|
int cbos;
|
||||||
@ -40,7 +41,7 @@ PARALLEL_FOR_LOOP
|
|||||||
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
|
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
|
||||||
int cb = half.checkerboard;
|
int cb = half.checkerboard;
|
||||||
int ssh=0;
|
int ssh=0;
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
for(int ss=0;ss<full._grid->oSites();ss++){
|
||||||
std::vector<int> coor;
|
std::vector<int> coor;
|
||||||
int cbos;
|
int cbos;
|
||||||
@ -158,6 +159,7 @@ template<class vobj,class CComplex>
|
|||||||
|
|
||||||
fine_inner = localInnerProduct(fineX,fineY);
|
fine_inner = localInnerProduct(fineX,fineY);
|
||||||
blockSum(coarse_inner,fine_inner);
|
blockSum(coarse_inner,fine_inner);
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<coarse->oSites();ss++){
|
for(int ss=0;ss<coarse->oSites();ss++){
|
||||||
CoarseInner._odata[ss] = coarse_inner._odata[ss];
|
CoarseInner._odata[ss] = coarse_inner._odata[ss];
|
||||||
}
|
}
|
||||||
@ -168,7 +170,7 @@ inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
|||||||
GridBase *coarse = ip._grid;
|
GridBase *coarse = ip._grid;
|
||||||
Lattice<vobj> zz(fineX._grid); zz=zero;
|
Lattice<vobj> zz(fineX._grid); zz=zero;
|
||||||
blockInnerProduct(ip,fineX,fineX);
|
blockInnerProduct(ip,fineX,fineX);
|
||||||
ip = rsqrt(ip);
|
ip = pow(ip,-0.5);
|
||||||
blockZAXPY(fineX,ip,fineX,zz);
|
blockZAXPY(fineX,ip,fineX,zz);
|
||||||
}
|
}
|
||||||
// useful in multigrid project;
|
// useful in multigrid project;
|
||||||
@ -297,5 +299,42 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
|
GridBase *cg = coarse._grid;
|
||||||
|
GridBase *fg = fine._grid;
|
||||||
|
|
||||||
|
int nd = cg->_ndimension;
|
||||||
|
|
||||||
|
subdivides(cg,fg);
|
||||||
|
|
||||||
|
assert(cg->_ndimension==fg->_ndimension);
|
||||||
|
|
||||||
|
std::vector<int> ratio(cg->_ndimension);
|
||||||
|
|
||||||
|
for(int d=0;d<cg->_ndimension;d++){
|
||||||
|
ratio[d] = fg->_fdimensions[d]/cg->_fdimensions[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> fcoor(nd);
|
||||||
|
std::vector<int> ccoor(nd);
|
||||||
|
for(int g=0;g<fg->gSites();g++){
|
||||||
|
|
||||||
|
fg->GlobalIndexToGlobalCoor(g,fcoor);
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
ccoor[d] = fcoor[d]%cg->_gdimensions[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
sobj tmp;
|
||||||
|
peekSite(tmp,coarse,ccoor);
|
||||||
|
pokeSite(tmp,fine,fcoor);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -24,7 +24,7 @@ PARALLEL_FOR_LOOP
|
|||||||
// Index level dependent transpose
|
// Index level dependent transpose
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<int Index,class vobj>
|
template<int Index,class vobj>
|
||||||
inline auto transposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
|
inline auto TransposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
|
||||||
{
|
{
|
||||||
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
|
@ -24,6 +24,17 @@ PARALLEL_FOR_LOOP
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class obj> Lattice<obj> div(const Lattice<obj> &rhs,Integer y){
|
||||||
|
Lattice<obj> ret(rhs._grid);
|
||||||
|
ret.checkerboard = rhs.checkerboard;
|
||||||
|
conformable(ret,rhs);
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||||
|
ret._odata[ss]=div(rhs._odata[ss],y);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){
|
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){
|
||||||
Lattice<obj> ret(rhs._grid);
|
Lattice<obj> ret(rhs._grid);
|
||||||
ret.checkerboard = rhs.checkerboard;
|
ret.checkerboard = rhs.checkerboard;
|
||||||
|
@ -22,7 +22,6 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
|
|||||||
typedef typename iobj::vector_type mask_type;
|
typedef typename iobj::vector_type mask_type;
|
||||||
|
|
||||||
const int Nsimd = grid->Nsimd();
|
const int Nsimd = grid->Nsimd();
|
||||||
const int words = sizeof(vobj)/sizeof(vector_type);
|
|
||||||
|
|
||||||
std::vector<Integer> mask(Nsimd);
|
std::vector<Integer> mask(Nsimd);
|
||||||
std::vector<scalar_object> truevals (Nsimd);
|
std::vector<scalar_object> truevals (Nsimd);
|
||||||
|
512
lib/parallelIO/BinaryIO.h
Normal file
512
lib/parallelIO/BinaryIO.h
Normal file
@ -0,0 +1,512 @@
|
|||||||
|
#ifndef GRID_BINARY_IO_H
|
||||||
|
#define GRID_BINARY_IO_H
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_ENDIAN_H
|
||||||
|
#include <endian.h>
|
||||||
|
#endif
|
||||||
|
#include <arpa/inet.h>
|
||||||
|
#include <algorithm>
|
||||||
|
// 64bit endian swap is a portability pain
|
||||||
|
#ifndef __has_builtin // Optional of course.
|
||||||
|
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_DECL_BE64TOH
|
||||||
|
#undef Grid_ntohll
|
||||||
|
#define Grid_ntohll be64toh
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_DECL_NTOHLL
|
||||||
|
#undef Grid_ntohll
|
||||||
|
#define Grid_ntohll ntohll
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef Grid_ntohll
|
||||||
|
|
||||||
|
#if BYTE_ORDER == BIG_ENDIAN
|
||||||
|
|
||||||
|
#define Grid_ntohll(A) (A)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if __has_builtin(__builtin_bswap64)
|
||||||
|
#define Grid_ntohll(A) __builtin_bswap64(A)
|
||||||
|
#else
|
||||||
|
#error
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
// A little helper
|
||||||
|
inline void removeWhitespace(std::string &key)
|
||||||
|
{
|
||||||
|
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
class BinaryIO {
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
|
||||||
|
// Network is big endian
|
||||||
|
static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);}
|
||||||
|
static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);}
|
||||||
|
static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);}
|
||||||
|
static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);}
|
||||||
|
|
||||||
|
static inline void be32toh_v(void *file_object,uint32_t bytes)
|
||||||
|
{
|
||||||
|
uint32_t * f = (uint32_t *)file_object;
|
||||||
|
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
||||||
|
f[i] = ntohl(f[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LE must Swap and switch to host
|
||||||
|
static inline void le32toh_v(void *file_object,uint32_t bytes)
|
||||||
|
{
|
||||||
|
uint32_t *fp = (uint32_t *)file_object;
|
||||||
|
uint32_t f;
|
||||||
|
|
||||||
|
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
||||||
|
f = fp[i];
|
||||||
|
// got network order and the network to host
|
||||||
|
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||||
|
fp[i] = ntohl(f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BE is same as network
|
||||||
|
static inline void be64toh_v(void *file_object,uint32_t bytes)
|
||||||
|
{
|
||||||
|
uint64_t * f = (uint64_t *)file_object;
|
||||||
|
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
||||||
|
f[i] = Grid_ntohll(f[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LE must swap and switch;
|
||||||
|
static inline void le64toh_v(void *file_object,uint32_t bytes)
|
||||||
|
{
|
||||||
|
uint64_t *fp = (uint64_t *)file_object;
|
||||||
|
uint64_t f,g;
|
||||||
|
|
||||||
|
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
||||||
|
f = fp[i];
|
||||||
|
// got network order and the network to host
|
||||||
|
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||||
|
g = g << 32;
|
||||||
|
f = f >> 32;
|
||||||
|
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||||
|
fp[i] = Grid_ntohll(g);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class fobj,class munger> static inline void Uint32Checksum(Lattice<vobj> lat,munger munge,uint32_t &csum)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
GridBase *grid = lat._grid ;
|
||||||
|
std::cout <<GridLogMessage<< "Uint32Checksum "<<norm2(lat)<<std::endl;
|
||||||
|
sobj siteObj;
|
||||||
|
fobj fileObj;
|
||||||
|
|
||||||
|
csum = 0;
|
||||||
|
std::vector<int> lcoor;
|
||||||
|
for(int l=0;l<grid->lSites();l++){
|
||||||
|
grid->CoorFromIndex(lcoor,l,grid->_ldimensions);
|
||||||
|
peekLocalSite(siteObj,lat,lcoor);
|
||||||
|
munge(siteObj,fileObj,csum);
|
||||||
|
}
|
||||||
|
grid->GlobalSum(csum);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void Uint32Checksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
|
||||||
|
{
|
||||||
|
for(int i=0;i*sizeof(uint32_t)<buf_size_bytes;i++){
|
||||||
|
csum=csum+buf[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class fobj,class munger>
|
||||||
|
static inline uint32_t readObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
|
GridBase *grid = Umu._grid;
|
||||||
|
|
||||||
|
std::cout<< GridLogMessage<< "Serial read I/O "<< file<< std::endl;
|
||||||
|
|
||||||
|
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||||
|
int ieee32 = (format == std::string("IEEE32"));
|
||||||
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
|
int ieee64 = (format == std::string("IEEE64"));
|
||||||
|
|
||||||
|
// Find the location of each site and send to primary node
|
||||||
|
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
|
||||||
|
// available (how short sighted is that?)
|
||||||
|
std::ifstream fin(file,std::ios::binary|std::ios::in);
|
||||||
|
fin.seekg(offset);
|
||||||
|
|
||||||
|
Umu = zero;
|
||||||
|
uint32_t csum=0;
|
||||||
|
fobj file_object;
|
||||||
|
sobj munged;
|
||||||
|
|
||||||
|
for(int t=0;t<grid->_fdimensions[3];t++){
|
||||||
|
for(int z=0;z<grid->_fdimensions[2];z++){
|
||||||
|
for(int y=0;y<grid->_fdimensions[1];y++){
|
||||||
|
for(int x=0;x<grid->_fdimensions[0];x++){
|
||||||
|
|
||||||
|
std::vector<int> site({x,y,z,t});
|
||||||
|
|
||||||
|
if ( grid->IsBoss() ) {
|
||||||
|
fin.read((char *)&file_object,sizeof(file_object));
|
||||||
|
|
||||||
|
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
|
||||||
|
|
||||||
|
munge(file_object,munged,csum);
|
||||||
|
}
|
||||||
|
// The boss who read the file has their value poked
|
||||||
|
pokeSite(munged,Umu,site);
|
||||||
|
}}}}
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class fobj,class munger>
|
||||||
|
static inline uint32_t writeObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string & format)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
|
GridBase *grid = Umu._grid;
|
||||||
|
|
||||||
|
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||||
|
int ieee32 = (format == std::string("IEEE32"));
|
||||||
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
|
int ieee64 = (format == std::string("IEEE64"));
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
// Serialise through node zero
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
std::cout<< GridLogMessage<< "Serial write I/O "<< file<<std::endl;
|
||||||
|
|
||||||
|
std::ofstream fout;
|
||||||
|
if ( grid->IsBoss() ) {
|
||||||
|
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
||||||
|
fout.seekp(offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t csum=0;
|
||||||
|
fobj file_object;
|
||||||
|
sobj unmunged;
|
||||||
|
for(int t=0;t<grid->_fdimensions[3];t++){
|
||||||
|
for(int z=0;z<grid->_fdimensions[2];z++){
|
||||||
|
for(int y=0;y<grid->_fdimensions[1];y++){
|
||||||
|
for(int x=0;x<grid->_fdimensions[0];x++){
|
||||||
|
|
||||||
|
std::vector<int> site({x,y,z,t});
|
||||||
|
// peek & write
|
||||||
|
peekSite(unmunged,Umu,site);
|
||||||
|
|
||||||
|
munge(unmunged,file_object,csum);
|
||||||
|
|
||||||
|
|
||||||
|
if ( grid->IsBoss() ) {
|
||||||
|
|
||||||
|
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
|
||||||
|
|
||||||
|
fout.write((char *)&file_object,sizeof(file_object));
|
||||||
|
}
|
||||||
|
}}}}
|
||||||
|
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class fobj,class munger>
|
||||||
|
static inline uint32_t readObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
|
GridBase *grid = Umu._grid;
|
||||||
|
|
||||||
|
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||||
|
int ieee32 = (format == std::string("IEEE32"));
|
||||||
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
|
int ieee64 = (format == std::string("IEEE64"));
|
||||||
|
|
||||||
|
|
||||||
|
// Take into account block size of parallel file systems want about
|
||||||
|
// 4-16MB chunks.
|
||||||
|
// Ideally one reader/writer per xy plane and read these contiguously
|
||||||
|
// with comms from nominated I/O nodes.
|
||||||
|
std::ifstream fin;
|
||||||
|
|
||||||
|
int nd = grid->_ndimension;
|
||||||
|
std::vector<int> parallel(nd,1);
|
||||||
|
std::vector<int> ioproc (nd);
|
||||||
|
std::vector<int> start(nd);
|
||||||
|
std::vector<int> range(nd);
|
||||||
|
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
assert(grid->CheckerBoarded(d) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t slice_vol = 1;
|
||||||
|
|
||||||
|
int IOnode = 1;
|
||||||
|
for(int d=0;d<grid->_ndimension;d++) {
|
||||||
|
|
||||||
|
if ( d==0 ) parallel[d] = 0;
|
||||||
|
if (parallel[d]) {
|
||||||
|
range[d] = grid->_ldimensions[d];
|
||||||
|
start[d] = grid->_processor_coor[d]*range[d];
|
||||||
|
ioproc[d]= grid->_processor_coor[d];
|
||||||
|
} else {
|
||||||
|
range[d] = grid->_gdimensions[d];
|
||||||
|
start[d] = 0;
|
||||||
|
ioproc[d]= 0;
|
||||||
|
|
||||||
|
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||||
|
}
|
||||||
|
slice_vol = slice_vol * range[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
uint32_t tmp = IOnode;
|
||||||
|
grid->GlobalSum(tmp);
|
||||||
|
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||||
|
for(int d=0;d<grid->_ndimension;d++){
|
||||||
|
std::cout<< range[d];
|
||||||
|
if( d< grid->_ndimension-1 )
|
||||||
|
std::cout<< " x ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int myrank = grid->ThisRank();
|
||||||
|
int iorank = grid->RankFromProcessorCoor(ioproc);
|
||||||
|
|
||||||
|
if ( IOnode ) {
|
||||||
|
fin.open(file,std::ios::binary|std::ios::in);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// Find the location of each site and send to primary node
|
||||||
|
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
|
||||||
|
// available (how short sighted is that?)
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
Umu = zero;
|
||||||
|
uint32_t csum=0;
|
||||||
|
fobj fileObj;
|
||||||
|
sobj siteObj;
|
||||||
|
|
||||||
|
// need to implement these loops in Nd independent way with a lexico conversion
|
||||||
|
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||||
|
|
||||||
|
std::vector<int> tsite(nd); // temporary mixed up site
|
||||||
|
std::vector<int> gsite(nd);
|
||||||
|
std::vector<int> lsite(nd);
|
||||||
|
std::vector<int> iosite(nd);
|
||||||
|
|
||||||
|
grid->CoorFromIndex(tsite,tlex,range);
|
||||||
|
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||||
|
gsite[d] = tsite[d]+start[d]; // global site
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////
|
||||||
|
// Get the rank of owner of data
|
||||||
|
/////////////////////////
|
||||||
|
int rank, o_idx,i_idx, g_idx;
|
||||||
|
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
|
||||||
|
grid->GlobalCoorToGlobalIndex(gsite,g_idx);
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// iorank reads from the seek
|
||||||
|
////////////////////////////////
|
||||||
|
if (myrank == iorank) {
|
||||||
|
|
||||||
|
fin.seekg(offset+g_idx*sizeof(fileObj));
|
||||||
|
fin.read((char *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
|
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
|
munge(fileObj,siteObj,csum);
|
||||||
|
|
||||||
|
if ( rank != myrank ) {
|
||||||
|
grid->SendTo((void *)&siteObj,rank,sizeof(siteObj));
|
||||||
|
} else {
|
||||||
|
pokeLocalSite(siteObj,Umu,lsite);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if ( myrank == rank ) {
|
||||||
|
grid->RecvFrom((void *)&siteObj,iorank,sizeof(siteObj));
|
||||||
|
pokeLocalSite(siteObj,Umu,lsite);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
grid->Barrier(); // necessary?
|
||||||
|
}
|
||||||
|
|
||||||
|
grid->GlobalSum(csum);
|
||||||
|
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// Parallel writer
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
template<class vobj,class fobj,class munger>
|
||||||
|
static inline uint32_t writeObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string & format)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
GridBase *grid = Umu._grid;
|
||||||
|
|
||||||
|
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||||
|
int ieee32 = (format == std::string("IEEE32"));
|
||||||
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
|
int ieee64 = (format == std::string("IEEE64"));
|
||||||
|
|
||||||
|
int nd = grid->_ndimension;
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
assert(grid->CheckerBoarded(d) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> parallel(nd,1);
|
||||||
|
std::vector<int> ioproc (nd);
|
||||||
|
std::vector<int> start(nd);
|
||||||
|
std::vector<int> range(nd);
|
||||||
|
|
||||||
|
uint64_t slice_vol = 1;
|
||||||
|
|
||||||
|
int IOnode = 1;
|
||||||
|
|
||||||
|
for(int d=0;d<grid->_ndimension;d++) {
|
||||||
|
|
||||||
|
if ( d==0 ) parallel[d] = 0;
|
||||||
|
|
||||||
|
if (parallel[d]) {
|
||||||
|
range[d] = grid->_ldimensions[d];
|
||||||
|
start[d] = grid->_processor_coor[d]*range[d];
|
||||||
|
ioproc[d]= grid->_processor_coor[d];
|
||||||
|
} else {
|
||||||
|
range[d] = grid->_gdimensions[d];
|
||||||
|
start[d] = 0;
|
||||||
|
ioproc[d]= 0;
|
||||||
|
|
||||||
|
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
slice_vol = slice_vol * range[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
uint32_t tmp = IOnode;
|
||||||
|
grid->GlobalSum(tmp);
|
||||||
|
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||||
|
for(int d=0;d<grid->_ndimension;d++){
|
||||||
|
std::cout<< range[d];
|
||||||
|
if( d< grid->_ndimension-1 )
|
||||||
|
std::cout<< " x ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int myrank = grid->ThisRank();
|
||||||
|
int iorank = grid->RankFromProcessorCoor(ioproc);
|
||||||
|
|
||||||
|
// Take into account block size of parallel file systems want about
|
||||||
|
// 4-16MB chunks.
|
||||||
|
// Ideally one reader/writer per xy plane and read these contiguously
|
||||||
|
// with comms from nominated I/O nodes.
|
||||||
|
std::ofstream fout;
|
||||||
|
if ( IOnode ) fout.open(file,std::ios::binary|std::ios::in|std::ios::out);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// Find the location of each site and send to primary node
|
||||||
|
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
|
||||||
|
// available (how short sighted is that?)
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
uint32_t csum=0;
|
||||||
|
fobj fileObj;
|
||||||
|
sobj siteObj;
|
||||||
|
|
||||||
|
|
||||||
|
// need to implement these loops in Nd independent way with a lexico conversion
|
||||||
|
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||||
|
|
||||||
|
std::vector<int> tsite(nd); // temporary mixed up site
|
||||||
|
std::vector<int> gsite(nd);
|
||||||
|
std::vector<int> lsite(nd);
|
||||||
|
std::vector<int> iosite(nd);
|
||||||
|
|
||||||
|
grid->CoorFromIndex(tsite,tlex,range);
|
||||||
|
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||||
|
gsite[d] = tsite[d]+start[d]; // global site
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/////////////////////////
|
||||||
|
// Get the rank of owner of data
|
||||||
|
/////////////////////////
|
||||||
|
int rank, o_idx,i_idx, g_idx;
|
||||||
|
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
|
||||||
|
grid->GlobalCoorToGlobalIndex(gsite,g_idx);
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// iorank writes from the seek
|
||||||
|
////////////////////////////////
|
||||||
|
if (myrank == iorank) {
|
||||||
|
|
||||||
|
if ( rank != myrank ) {
|
||||||
|
grid->RecvFrom((void *)&siteObj,rank,sizeof(siteObj));
|
||||||
|
} else {
|
||||||
|
peekLocalSite(siteObj,Umu,lsite);
|
||||||
|
}
|
||||||
|
|
||||||
|
munge(siteObj,fileObj,csum);
|
||||||
|
|
||||||
|
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
|
fout.seekp(offset+g_idx*sizeof(fileObj));
|
||||||
|
fout.write((char *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if ( myrank == rank ) {
|
||||||
|
peekLocalSite(siteObj,Umu,lsite);
|
||||||
|
grid->SendTo((void *)&siteObj,iorank,sizeof(siteObj));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
grid->Barrier(); // necessary// or every 16 packets to rate throttle??
|
||||||
|
}
|
||||||
|
|
||||||
|
grid->GlobalSum(csum);
|
||||||
|
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -7,57 +7,23 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
#ifdef HAVE_ENDIAN_H
|
#include <unistd.h>
|
||||||
#include <endian.h>
|
#include <sys/utsname.h>
|
||||||
#endif
|
#include <pwd.h>
|
||||||
|
|
||||||
|
|
||||||
#include <arpa/inet.h>
|
|
||||||
|
|
||||||
// 64bit endian swap is a portability pain
|
|
||||||
#ifndef __has_builtin // Optional of course.
|
|
||||||
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if HAVE_DECL_BE64TOH
|
|
||||||
#undef Grid_ntohll
|
|
||||||
#define Grid_ntohll be64toh
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if HAVE_DECL_NTOHLL
|
|
||||||
#undef Grid_ntohll
|
|
||||||
#define Grid_ntohll ntohll
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef Grid_ntohll
|
|
||||||
|
|
||||||
#if BYTE_ORDER == BIG_ENDIAN
|
|
||||||
|
|
||||||
#define Grid_ntohll(A) (A)
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#if __has_builtin(__builtin_bswap64)
|
|
||||||
#define Grid_ntohll(A) __builtin_bswap64(A)
|
|
||||||
#else
|
|
||||||
#error
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
using namespace QCD;
|
using namespace Grid;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Some data types for intermediate storage
|
// Some data types for intermediate storage
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
|
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
|
||||||
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
|
|
||||||
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
|
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
|
||||||
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
|
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
|
||||||
|
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// header specification/interpretation
|
// header specification/interpretation
|
||||||
@ -86,50 +52,173 @@ class NerscField {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Bit and Physical Checksumming and QA of data
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
inline void NerscGrid(GridBase *grid,NerscField &header)
|
||||||
|
{
|
||||||
|
assert(grid->_ndimension==4);
|
||||||
|
for(int d=0;d<4;d++) {
|
||||||
|
header.dimension[d] = grid->_fdimensions[d];
|
||||||
|
}
|
||||||
|
for(int d=0;d<4;d++) {
|
||||||
|
header.boundary[d] = std::string("PERIODIC");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class GaugeField>
|
||||||
|
inline void NerscStatistics(GaugeField & data,NerscField &header)
|
||||||
|
{
|
||||||
|
header.link_trace=Grid::QCD::WilsonLoops<GaugeField>::linkTrace(data);
|
||||||
|
header.plaquette =Grid::QCD::WilsonLoops<GaugeField>::avgPlaquette(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void NerscMachineCharacteristics(NerscField &header)
|
||||||
|
{
|
||||||
|
// Who
|
||||||
|
struct passwd *pw = getpwuid (getuid());
|
||||||
|
if (pw) header.creator = std::string(pw->pw_name);
|
||||||
|
|
||||||
|
// When
|
||||||
|
std::time_t t = std::time(nullptr);
|
||||||
|
std::tm tm = *std::localtime(&t);
|
||||||
|
std::ostringstream oss;
|
||||||
|
// oss << std::put_time(&tm, "%c %Z");
|
||||||
|
header.creation_date = oss.str();
|
||||||
|
header.archive_date = header.creation_date;
|
||||||
|
|
||||||
|
// What
|
||||||
|
struct utsname name; uname(&name);
|
||||||
|
header.creator_hardware = std::string(name.nodename)+"-";
|
||||||
|
header.creator_hardware+= std::string(name.machine)+"-";
|
||||||
|
header.creator_hardware+= std::string(name.sysname)+"-";
|
||||||
|
header.creator_hardware+= std::string(name.release);
|
||||||
|
|
||||||
|
}
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Utilities ; these are QCD aware
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
|
||||||
|
{
|
||||||
|
BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum);
|
||||||
|
}
|
||||||
|
inline void reconstruct3(LorentzColourMatrix & cm)
|
||||||
|
{
|
||||||
|
const int x=0;
|
||||||
|
const int y=1;
|
||||||
|
const int z=2;
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
|
||||||
|
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
|
||||||
|
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class fobj,class sobj>
|
||||||
|
struct NerscSimpleMunger{
|
||||||
|
|
||||||
|
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
for(int i=0;i<3;i++){
|
||||||
|
for(int j=0;j<3;j++){
|
||||||
|
out(mu)()(i,j) = in(mu)()(i,j);
|
||||||
|
}}}
|
||||||
|
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class fobj,class sobj>
|
||||||
|
struct NerscSimpleUnmunger{
|
||||||
|
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
for(int i=0;i<Nc;i++){
|
||||||
|
for(int j=0;j<Nc;j++){
|
||||||
|
out(mu)()(i,j) = in(mu)()(i,j);
|
||||||
|
}}}
|
||||||
|
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class fobj,class sobj>
|
||||||
|
struct Nersc3x2munger{
|
||||||
|
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
||||||
|
|
||||||
|
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
for(int i=0;i<2;i++){
|
||||||
|
for(int j=0;j<3;j++){
|
||||||
|
out(mu)()(i,j) = in(mu)(i)(j);
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
reconstruct3(out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class fobj,class sobj>
|
||||||
|
struct Nersc3x2unmunger{
|
||||||
|
|
||||||
|
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
||||||
|
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
for(int i=0;i<2;i++){
|
||||||
|
for(int j=0;j<3;j++){
|
||||||
|
out(mu)(i)(j) = in(mu)()(i,j);
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Write and read from fstream; comput header offset for payload
|
// Write and read from fstream; comput header offset for payload
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
inline unsigned int writeNerscHeader(NerscField &field,std::string file)
|
class NerscIO : public BinaryIO {
|
||||||
{
|
public:
|
||||||
std::ofstream fout(file,std::ios::out);
|
|
||||||
|
static inline unsigned int writeHeader(NerscField &field,std::string file)
|
||||||
|
{
|
||||||
|
std::ofstream fout(file,std::ios::out);
|
||||||
|
|
||||||
fout.seekp(0,std::ios::beg);
|
fout.seekp(0,std::ios::beg);
|
||||||
fout << "BEGIN_HEADER" << std::endl;
|
fout << "BEGIN_HEADER" << std::endl;
|
||||||
fout << "HDR_VERSION = " << field.hdr_version << std::endl;
|
fout << "HDR_VERSION = " << field.hdr_version << std::endl;
|
||||||
fout << "DATATYPE = " << field.data_type << std::endl;
|
fout << "DATATYPE = " << field.data_type << std::endl;
|
||||||
fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
|
fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
|
||||||
|
|
||||||
for(int i=0;i<4;i++){
|
for(int i=0;i<4;i++){
|
||||||
fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
|
fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
|
||||||
}
|
}
|
||||||
// just to keep the space and write it later
|
// just to keep the space and write it later
|
||||||
fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
|
fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
|
||||||
fout << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;
|
fout << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;
|
||||||
for(int i=0;i<4;i++){
|
for(int i=0;i<4;i++){
|
||||||
fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
|
fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
|
||||||
}
|
}
|
||||||
fout << "CHECKSUM = "<< std::hex << std::setw(16) << 0 << field.checksum << std::endl;
|
|
||||||
|
|
||||||
fout << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;
|
fout << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::endl;
|
||||||
fout << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;
|
fout << std::dec;
|
||||||
fout << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;
|
|
||||||
fout << "CREATOR = " << field.creator << std::endl;
|
fout << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;
|
||||||
fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
|
fout << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;
|
||||||
fout << "CREATION_DATE = " << field.creation_date << std::endl;
|
fout << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;
|
||||||
fout << "ARCHIVE_DATE = " << field.archive_date << std::endl;
|
fout << "CREATOR = " << field.creator << std::endl;
|
||||||
fout << "FLOATING_POINT = " << field.floating_point << std::endl;
|
fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
|
||||||
fout << "END_HEADER" << std::endl;
|
fout << "CREATION_DATE = " << field.creation_date << std::endl;
|
||||||
field.data_start = fout.tellp();
|
fout << "ARCHIVE_DATE = " << field.archive_date << std::endl;
|
||||||
return field.data_start;
|
fout << "FLOATING_POINT = " << field.floating_point << std::endl;
|
||||||
|
fout << "END_HEADER" << std::endl;
|
||||||
|
field.data_start = fout.tellp();
|
||||||
|
return field.data_start;
|
||||||
}
|
}
|
||||||
|
|
||||||
// A little helper
|
|
||||||
inline void removeWhitespace(std::string &key)
|
|
||||||
{
|
|
||||||
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
|
|
||||||
}
|
|
||||||
// for the header-reader
|
// for the header-reader
|
||||||
inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
|
static inline int readHeader(std::string file,GridBase *grid, NerscField &field)
|
||||||
{
|
{
|
||||||
int offset=0;
|
int offset=0;
|
||||||
std::map<std::string,std::string> header;
|
std::map<std::string,std::string> header;
|
||||||
@ -163,7 +252,6 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
|
|||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// chomp the values
|
// chomp the values
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
|
|
||||||
field.hdr_version = header["HDR_VERSION"];
|
field.hdr_version = header["HDR_VERSION"];
|
||||||
field.data_type = header["DATATYPE"];
|
field.data_type = header["DATATYPE"];
|
||||||
field.storage_format = header["STORAGE_FORMAT"];
|
field.storage_format = header["STORAGE_FORMAT"];
|
||||||
@ -200,314 +288,21 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// Utilities
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
inline void reconstruct3(LorentzColourMatrix & cm)
|
|
||||||
{
|
|
||||||
const int x=0;
|
|
||||||
const int y=1;
|
|
||||||
const int z=2;
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
|
|
||||||
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
|
|
||||||
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void inline be32toh_v(void *file_object,uint32_t bytes)
|
|
||||||
{
|
|
||||||
uint32_t * f = (uint32_t *)file_object;
|
|
||||||
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
|
||||||
f[i] = ntohl(f[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void inline le32toh_v(void *file_object,uint32_t bytes)
|
|
||||||
{
|
|
||||||
uint32_t *fp = (uint32_t *)file_object;
|
|
||||||
|
|
||||||
uint32_t f;
|
|
||||||
|
|
||||||
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
|
||||||
f = fp[i];
|
|
||||||
// got network order and the network to host
|
|
||||||
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
|
||||||
fp[i] = ntohl(f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void inline be64toh_v(void *file_object,uint32_t bytes)
|
|
||||||
{
|
|
||||||
uint64_t * f = (uint64_t *)file_object;
|
|
||||||
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
|
||||||
f[i] = Grid_ntohll(f[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void inline le64toh_v(void *file_object,uint32_t bytes)
|
|
||||||
{
|
|
||||||
uint64_t *fp = (uint64_t *)file_object;
|
|
||||||
uint64_t f,g;
|
|
||||||
|
|
||||||
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
|
||||||
f = fp[i];
|
|
||||||
// got network order and the network to host
|
|
||||||
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
|
||||||
g = g << 32;
|
|
||||||
f = f >> 32;
|
|
||||||
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
|
||||||
fp[i] = ntohl(g);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size,uint32_t &csum)
|
|
||||||
{
|
|
||||||
for(int i=0;i*sizeof(uint32_t)<buf_size;i++){
|
|
||||||
csum=csum+buf[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class fobj,class sobj>
|
|
||||||
struct NerscSimpleMunger{
|
|
||||||
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
|
||||||
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
for(int i=0;i<3;i++){
|
|
||||||
for(int j=0;j<3;j++){
|
|
||||||
out(mu)()(i,j) = in(mu)()(i,j);
|
|
||||||
}}}
|
|
||||||
|
|
||||||
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
|
||||||
};
|
|
||||||
};
|
|
||||||
template<class fobj,class sobj>
|
|
||||||
struct NerscSimpleUnmunger{
|
|
||||||
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
for(int i=0;i<3;i++){
|
|
||||||
for(int j=0;j<3;j++){
|
|
||||||
out(mu)()(i,j) = in(mu)()(i,j);
|
|
||||||
}}}
|
|
||||||
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class fobj,class sobj>
|
|
||||||
struct Nersc3x2munger{
|
|
||||||
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
|
||||||
|
|
||||||
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
|
||||||
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
for(int i=0;i<2;i++){
|
|
||||||
for(int j=0;j<3;j++){
|
|
||||||
out(mu)()(i,j) = in(mu)(i)(j);
|
|
||||||
}}
|
|
||||||
}
|
|
||||||
reconstruct3(out);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class fobj,class sobj>
|
|
||||||
struct Nersc3x2unmunger{
|
|
||||||
|
|
||||||
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
|
||||||
|
|
||||||
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
|
||||||
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
for(int i=0;i<2;i++){
|
|
||||||
for(int j=0;j<3;j++){
|
|
||||||
out(mu)(i)(j) = in(mu)()(i,j);
|
|
||||||
}}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Template wizardry to map types to strings for NERSC in an extensible way
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
|
||||||
template<class vobj> struct NerscDataType {
|
|
||||||
static void DataType (std::string &str) { str = std::string("4D_BINARY_UNKNOWN"); };
|
|
||||||
static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG"); };
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct NerscDataType<iColourMatrix<ComplexD> > {
|
|
||||||
static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); };
|
|
||||||
static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG");};
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct NerscDataType<iColourMatrix<ComplexF> > {
|
|
||||||
static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); };
|
|
||||||
static void FloatingPoint(std::string &str) { str = std::string("IEEE32BIG");};
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// Bit and Physical Checksumming and QA of data
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
/*
|
|
||||||
template<class vobj> inline uint32_t NerscChecksum(Lattice<vobj> & data)
|
|
||||||
{
|
|
||||||
uint32_t sum;
|
|
||||||
for(int ss=0;ss<data._grid->Osites();ss++){
|
|
||||||
uint32_t *iptr = (uint32_t *)& data._odata[0] ;
|
|
||||||
for(int i=0;i<sizeof(vobj);i+=sizeof(uint32_t)){
|
|
||||||
sum=sum+iptr[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
data._grid->globalSum(sum);
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
template<class vobj> inline void NerscPhysicalCharacteristics(Lattice<vobj> & data,NerscField &header)
|
|
||||||
{
|
|
||||||
header.data_type = NerscDataType<vobj>::DataType;
|
|
||||||
header.floating_point = NerscDataType<vobj>::FloatingPoint;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> inline void NerscPhysicalCharacteristics(LatticeGaugeField & data,NerscField &header)
|
|
||||||
{
|
|
||||||
NerscDataType<decltype(data._odata[0])>::DataType(header.data_type);
|
|
||||||
NerscDataType<decltype(data._odata[0])>::FloatingPoint(header.floating_point);
|
|
||||||
header.link_trace=1.0;
|
|
||||||
header.plaquette =1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class vobj> inline void NerscStatisics(Lattice<vobj> & data,NerscField &header)
|
|
||||||
{
|
|
||||||
assert(data._grid->_ndimension==4);
|
|
||||||
|
|
||||||
for(int d=0;d<4;d++)
|
|
||||||
header.dimension[d] = data._grid->_fdimensions[d];
|
|
||||||
|
|
||||||
// compute checksum and any physical properties contained for this type
|
|
||||||
// header.checksum = NerscChecksum(data);
|
|
||||||
|
|
||||||
NerscPhysicalCharacteristics(data,header);
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Now the meat: the object readers
|
// Now the meat: the object readers
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class vobj,class sobj,class fobj,class munger>
|
|
||||||
inline void readNerscObject(Lattice<vobj> &Umu,std::string file,munger munge,int offset,std::string &format)
|
template<class vsimd>
|
||||||
|
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file)
|
||||||
{
|
{
|
||||||
|
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||||
|
|
||||||
GridBase *grid = Umu._grid;
|
GridBase *grid = Umu._grid;
|
||||||
|
int offset = readHeader(file,Umu._grid,header);
|
||||||
|
|
||||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
NerscField clone(header);
|
||||||
int ieee32 = (format == std::string("IEEE32"));
|
|
||||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
|
||||||
int ieee64 = (format == std::string("IEEE64"));
|
|
||||||
|
|
||||||
// Find the location of each site and send to primary node
|
|
||||||
// for(int site=0; site < Layout::vol(); ++site){
|
|
||||||
// multi1d<int> coord = crtesn(site, Layout::lattSize());
|
|
||||||
// for(int dd=0; dd<Nd; dd++){ /* dir */
|
|
||||||
// cfg_in.readArray(su3_buffer, float_size, mat_size);
|
|
||||||
//
|
|
||||||
// Above from Chroma; defines loop order now that NERSC doc no longer
|
|
||||||
// available (how short sighted is that?)
|
|
||||||
{
|
|
||||||
std::ifstream fin(file,std::ios::binary|std::ios::in);
|
|
||||||
fin.seekg(offset);
|
|
||||||
|
|
||||||
Umu = zero;
|
|
||||||
uint32_t csum=0;
|
|
||||||
fobj file_object;
|
|
||||||
sobj munged;
|
|
||||||
|
|
||||||
for(int t=0;t<grid->_fdimensions[3];t++){
|
|
||||||
for(int z=0;z<grid->_fdimensions[2];z++){
|
|
||||||
for(int y=0;y<grid->_fdimensions[1];y++){
|
|
||||||
for(int x=0;x<grid->_fdimensions[0];x++){
|
|
||||||
|
|
||||||
std::vector<int> site({x,y,z,t});
|
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
|
||||||
fin.read((char *)&file_object,sizeof(file_object));
|
|
||||||
|
|
||||||
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
|
|
||||||
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
|
|
||||||
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
|
|
||||||
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
|
|
||||||
|
|
||||||
munge(file_object,munged,csum);
|
|
||||||
}
|
|
||||||
// The boss who read the file has their value poked
|
|
||||||
pokeSite(munged,Umu,site);
|
|
||||||
}}}}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class vobj,class sobj,class fobj,class munger>
|
|
||||||
inline void writeNerscObject(Lattice<vobj> &Umu,std::string file,munger munge,int offset,
|
|
||||||
int sequence,double lt,double pl)
|
|
||||||
{
|
|
||||||
GridBase *grid = Umu._grid;
|
|
||||||
NerscField header;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
// First write the header; this is in wrong place
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
assert(grid->_ndimension == 4);
|
|
||||||
for(int d=0;d<4;d++){
|
|
||||||
header.dimension[d]=grid->_fdimensions[d];
|
|
||||||
header.boundary [d]=std::string("PERIODIC");;
|
|
||||||
}
|
|
||||||
header.hdr_version=std::string("WHATDAHECK");
|
|
||||||
// header.storage_format=storage_format<vobj>::string; // use template specialisation
|
|
||||||
// header.data_type=data_type<vobj>::string;
|
|
||||||
header.storage_format=std::string("debug");
|
|
||||||
header.data_type =std::string("debug");
|
|
||||||
|
|
||||||
//FIXME; use template specialisation to fill these out
|
|
||||||
header.link_trace =lt;
|
|
||||||
header.plaquette =pl;
|
|
||||||
header.checksum =0;
|
|
||||||
|
|
||||||
//
|
|
||||||
header.sequence_number =sequence;
|
|
||||||
header.ensemble_id =std::string("UKQCD");
|
|
||||||
header.ensemble_label =std::string("UKQCD");
|
|
||||||
header.creator =std::string("Tadahito");
|
|
||||||
header.creator_hardware=std::string("BlueGene/Q");
|
|
||||||
header.creation_date =std::string("AnnoDomini");
|
|
||||||
header.archive_date =std::string("AnnoDomini");
|
|
||||||
header.floating_point =std::string("IEEE64BIG");
|
|
||||||
// header.data_start=;
|
|
||||||
// unsigned int checksum;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
// Now write the body
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
{
|
|
||||||
std::ofstream fout(file,std::ios::binary|std::ios::out);
|
|
||||||
fout.seekp(offset);
|
|
||||||
|
|
||||||
Umu = zero;
|
|
||||||
uint32_t csum=0;
|
|
||||||
fobj file_object;
|
|
||||||
sobj unmunged;
|
|
||||||
for(int t=0;t<grid->_fdimensions[3];t++){
|
|
||||||
for(int z=0;z<grid->_fdimensions[2];z++){
|
|
||||||
for(int y=0;y<grid->_fdimensions[1];y++){
|
|
||||||
for(int x=0;x<grid->_fdimensions[0];x++){
|
|
||||||
std::vector<int> site({x,y,z,t});
|
|
||||||
peekSite(unmunged,Umu,site);
|
|
||||||
munge(unmunged,file_object,csum);
|
|
||||||
// broadcast & insert
|
|
||||||
fout.write((char *)&file_object,sizeof(file_object));
|
|
||||||
}}}}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std::string file)
|
|
||||||
{
|
|
||||||
GridBase *grid = Umu._grid;
|
|
||||||
|
|
||||||
int offset = readNerscHeader(file,Umu._grid,header);
|
|
||||||
|
|
||||||
std::string format(header.floating_point);
|
std::string format(header.floating_point);
|
||||||
|
|
||||||
@ -516,48 +311,106 @@ inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std
|
|||||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
int ieee64 = (format == std::string("IEEE64"));
|
int ieee64 = (format == std::string("IEEE64"));
|
||||||
|
|
||||||
|
uint32_t csum;
|
||||||
// depending on datatype, set up munger;
|
// depending on datatype, set up munger;
|
||||||
// munger is a function of <floating point, Real, data_type>
|
// munger is a function of <floating point, Real, data_type>
|
||||||
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
|
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
|
||||||
if ( ieee32 || ieee32big ) {
|
if ( ieee32 || ieee32big ) {
|
||||||
readNerscObject<vLorentzColourMatrix, LorentzColourMatrix, LorentzColour2x3F>
|
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
|
||||||
(Umu,file,
|
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
|
||||||
Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(),
|
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
|
||||||
offset,format);
|
|
||||||
}
|
}
|
||||||
if ( ieee64 || ieee64big ) {
|
if ( ieee64 || ieee64big ) {
|
||||||
readNerscObject<vLorentzColourMatrix, LorentzColourMatrix, LorentzColour2x3D>
|
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
|
||||||
(Umu,file,
|
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
|
||||||
Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),
|
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
|
||||||
offset,format);
|
|
||||||
}
|
}
|
||||||
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3X3") ) {
|
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3X3") ) {
|
||||||
if ( ieee32 || ieee32big ) {
|
if ( ieee32 || ieee32big ) {
|
||||||
readNerscObject<vLorentzColourMatrix,LorentzColourMatrix,LorentzColourMatrixF>
|
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
|
||||||
|
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
|
||||||
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
|
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
|
||||||
}
|
}
|
||||||
if ( ieee64 || ieee64big ) {
|
if ( ieee64 || ieee64big ) {
|
||||||
readNerscObject<vLorentzColourMatrix,LorentzColourMatrix,LorentzColourMatrixD>
|
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
|
||||||
|
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
|
||||||
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
|
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NerscStatistics<GaugeField>(Umu,clone);
|
||||||
|
|
||||||
|
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
|
||||||
|
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
|
||||||
|
assert(csum == header.checksum );
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"Read NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj>
|
template<class vsimd>
|
||||||
inline void writeNerscConfiguration(Lattice<vobj> &Umu,NerscField &header,std::string file)
|
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32)
|
||||||
{
|
{
|
||||||
GridBase &grid = Umu._grid;
|
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||||
|
|
||||||
|
typedef iLorentzColourMatrix<vsimd> vobj;
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
|
// Following should become arguments
|
||||||
|
NerscField header;
|
||||||
|
header.sequence_number = 1;
|
||||||
|
header.ensemble_id = "UKQCD";
|
||||||
|
header.ensemble_label = "DWF";
|
||||||
|
|
||||||
|
typedef LorentzColourMatrixD fobj3D;
|
||||||
|
typedef LorentzColour2x3D fobj2D;
|
||||||
|
typedef LorentzColourMatrixF fobj3f;
|
||||||
|
typedef LorentzColour2x3F fobj2f;
|
||||||
|
|
||||||
|
GridBase *grid = Umu._grid;
|
||||||
|
|
||||||
|
NerscGrid(grid,header);
|
||||||
|
NerscStatistics<GaugeField>(Umu,header);
|
||||||
|
NerscMachineCharacteristics(header);
|
||||||
|
|
||||||
|
uint32_t csum;
|
||||||
|
int offset;
|
||||||
|
|
||||||
NerscStatisics(Umu,header);
|
if ( two_row ) {
|
||||||
|
|
||||||
int offset = writeNerscHeader(header,file);
|
header.floating_point = std::string("IEEE64BIG");
|
||||||
|
header.data_type = std::string("4D_SU3_GAUGE");
|
||||||
|
Nersc3x2unmunger<fobj2D,sobj> munge;
|
||||||
|
BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum);
|
||||||
|
offset = writeHeader(header,file);
|
||||||
|
csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
|
||||||
|
|
||||||
writeNerscObject(Umu,NerscSimpleMunger<vobj,vobj>(),offset);
|
std::string file1 = file+"para";
|
||||||
}
|
int offset1 = writeHeader(header,file1);
|
||||||
|
int csum1=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file1,munge,offset,header.floating_point);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " TESTING PARALLEL WRITE offsets " << offset1 << " "<< offset << std::endl;
|
||||||
|
std::cout << GridLogMessage <<std::hex<< " TESTING PARALLEL WRITE csums " << csum1 << " "<< csum << std::endl;
|
||||||
|
std::cout << std::dec;
|
||||||
|
|
||||||
}
|
assert(offset1==offset);
|
||||||
|
assert(csum1==csum);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
header.floating_point = std::string("IEEE64BIG");
|
||||||
|
header.data_type = std::string("4D_SU3_GAUGE_3X3");
|
||||||
|
NerscSimpleUnmunger<fobj3D,sobj> munge;
|
||||||
|
BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
|
||||||
|
offset = writeHeader(header,file);
|
||||||
|
csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"Written NERSC Configuration "<<file<< " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}}
|
||||||
#endif
|
#endif
|
||||||
|
44
lib/pugixml/README.md
Normal file
44
lib/pugixml/README.md
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
pugixml [](https://travis-ci.org/zeux/pugixml) [](https://ci.appveyor.com/project/zeux/pugixml)
|
||||||
|
=======
|
||||||
|
|
||||||
|
pugixml is a C++ XML processing library, which consists of a DOM-like interface with rich traversal/modification
|
||||||
|
capabilities, an extremely fast XML parser which constructs the DOM tree from an XML file/buffer, and an XPath 1.0
|
||||||
|
implementation for complex data-driven tree queries. Full Unicode support is also available, with Unicode interface
|
||||||
|
variants and conversions between different Unicode encodings (which happen automatically during parsing/saving).
|
||||||
|
|
||||||
|
pugixml is used by a lot of projects, both open-source and proprietary, for performance and easy-to-use interface.
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
Documentation for the current release of pugixml is available on-line as two separate documents:
|
||||||
|
|
||||||
|
* [Quick-start guide](http://pugixml.org/docs/quickstart.html), that aims to provide enough information to start using the library;
|
||||||
|
* [Complete reference manual](http://pugixml.org/docs/manual.html), that describes all features of the library in detail.
|
||||||
|
|
||||||
|
You’re advised to start with the quick-start guide; however, many important library features are either not described in it at all or only mentioned briefly; if you require more information you should read the complete manual.
|
||||||
|
|
||||||
|
## License
|
||||||
|
This library is available to anybody free of charge, under the terms of MIT License:
|
||||||
|
|
||||||
|
Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person
|
||||||
|
obtaining a copy of this software and associated documentation
|
||||||
|
files (the "Software"), to deal in the Software without
|
||||||
|
restriction, including without limitation the rights to use,
|
||||||
|
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the
|
||||||
|
Software is furnished to do so, subject to the following
|
||||||
|
conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
OTHER DEALINGS IN THE SOFTWARE.
|
71
lib/pugixml/pugiconfig.hpp
Normal file
71
lib/pugixml/pugiconfig.hpp
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
/**
|
||||||
|
* pugixml parser - version 1.6
|
||||||
|
* --------------------------------------------------------
|
||||||
|
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||||
|
* Report bugs and download new versions at http://pugixml.org/
|
||||||
|
*
|
||||||
|
* This library is distributed under the MIT License. See notice at the end
|
||||||
|
* of this file.
|
||||||
|
*
|
||||||
|
* This work is based on the pugxml parser, which is:
|
||||||
|
* Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef HEADER_PUGICONFIG_HPP
|
||||||
|
#define HEADER_PUGICONFIG_HPP
|
||||||
|
|
||||||
|
// Uncomment this to enable wchar_t mode
|
||||||
|
// #define PUGIXML_WCHAR_MODE
|
||||||
|
|
||||||
|
// Uncomment this to disable XPath
|
||||||
|
// #define PUGIXML_NO_XPATH
|
||||||
|
|
||||||
|
// Uncomment this to disable STL
|
||||||
|
// #define PUGIXML_NO_STL
|
||||||
|
|
||||||
|
// Uncomment this to disable exceptions
|
||||||
|
// #define PUGIXML_NO_EXCEPTIONS
|
||||||
|
|
||||||
|
// Set this to control attributes for public classes/functions, i.e.:
|
||||||
|
// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
|
||||||
|
// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
|
||||||
|
// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
|
||||||
|
// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
|
||||||
|
|
||||||
|
// Tune these constants to adjust memory-related behavior
|
||||||
|
// #define PUGIXML_MEMORY_PAGE_SIZE 32768
|
||||||
|
// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
|
||||||
|
// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
|
||||||
|
|
||||||
|
// Uncomment this to switch to header-only version
|
||||||
|
// #define PUGIXML_HEADER_ONLY
|
||||||
|
|
||||||
|
// Uncomment this to enable long long support
|
||||||
|
// #define PUGIXML_HAS_LONG_LONG
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
12485
lib/pugixml/pugixml.cc
Normal file
12485
lib/pugixml/pugixml.cc
Normal file
File diff suppressed because it is too large
Load Diff
1400
lib/pugixml/pugixml.h
Normal file
1400
lib/pugixml/pugixml.h
Normal file
File diff suppressed because it is too large
Load Diff
52
lib/pugixml/readme.txt
Normal file
52
lib/pugixml/readme.txt
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
pugixml 1.6 - an XML processing library
|
||||||
|
|
||||||
|
Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||||
|
Report bugs and download new versions at http://pugixml.org/
|
||||||
|
|
||||||
|
This is the distribution of pugixml, which is a C++ XML processing library,
|
||||||
|
which consists of a DOM-like interface with rich traversal/modification
|
||||||
|
capabilities, an extremely fast XML parser which constructs the DOM tree from
|
||||||
|
an XML file/buffer, and an XPath 1.0 implementation for complex data-driven
|
||||||
|
tree queries. Full Unicode support is also available, with Unicode interface
|
||||||
|
variants and conversions between different Unicode encodings (which happen
|
||||||
|
automatically during parsing/saving).
|
||||||
|
|
||||||
|
The distribution contains the following folders:
|
||||||
|
|
||||||
|
contrib/ - various contributions to pugixml
|
||||||
|
|
||||||
|
docs/ - documentation
|
||||||
|
docs/samples - pugixml usage examples
|
||||||
|
docs/quickstart.html - quick start guide
|
||||||
|
docs/manual.html - complete manual
|
||||||
|
|
||||||
|
scripts/ - project files for IDE/build systems
|
||||||
|
|
||||||
|
src/ - header and source files
|
||||||
|
|
||||||
|
readme.txt - this file.
|
||||||
|
|
||||||
|
This library is distributed under the MIT License:
|
||||||
|
|
||||||
|
Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person
|
||||||
|
obtaining a copy of this software and associated documentation
|
||||||
|
files (the "Software"), to deal in the Software without
|
||||||
|
restriction, including without limitation the rights to use,
|
||||||
|
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the
|
||||||
|
Software is furnished to do so, subject to the following
|
||||||
|
conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
OTHER DEALINGS IN THE SOFTWARE.
|
@ -19,15 +19,25 @@ namespace QCD {
|
|||||||
static const int Nd=4;
|
static const int Nd=4;
|
||||||
static const int Nhs=2; // half spinor
|
static const int Nhs=2; // half spinor
|
||||||
static const int Nds=8; // double stored gauge field
|
static const int Nds=8; // double stored gauge field
|
||||||
|
static const int Ngp=2; // gparity index range
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// QCD iMatrix types
|
// QCD iMatrix types
|
||||||
// Index conventions: Lorentz x Spin x Colour
|
// Index conventions: Lorentz x Spin x Colour
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
static const int ColourIndex = 1;
|
static const int ColourIndex = 2;
|
||||||
static const int SpinIndex = 2;
|
static const int SpinIndex = 1;
|
||||||
static const int LorentzIndex= 3;
|
static const int LorentzIndex= 0;
|
||||||
|
|
||||||
|
// Useful traits is this a spin index
|
||||||
|
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||||
|
|
||||||
|
const int SpinorIndex = 2;
|
||||||
|
template<typename T> struct isSpinor {
|
||||||
|
static const bool value = (SpinorIndex==T::TensorLevel);
|
||||||
|
};
|
||||||
|
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
|
||||||
|
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
|
||||||
|
|
||||||
// ChrisK very keen to add extra space for Gparity doubling.
|
// ChrisK very keen to add extra space for Gparity doubling.
|
||||||
//
|
//
|
||||||
@ -49,6 +59,9 @@ namespace QCD {
|
|||||||
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||||
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||||
|
|
||||||
|
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||||
|
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||||
|
|
||||||
// Spin matrix
|
// Spin matrix
|
||||||
typedef iSpinMatrix<Complex > SpinMatrix;
|
typedef iSpinMatrix<Complex > SpinMatrix;
|
||||||
typedef iSpinMatrix<ComplexF > SpinMatrixF;
|
typedef iSpinMatrix<ComplexF > SpinMatrixF;
|
||||||
@ -140,7 +153,7 @@ namespace QCD {
|
|||||||
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
|
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
|
||||||
|
|
||||||
// singlets
|
// singlets
|
||||||
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
||||||
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
|
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
|
||||||
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
|
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
|
||||||
|
|
||||||
@ -148,7 +161,7 @@ namespace QCD {
|
|||||||
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
|
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
|
||||||
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
|
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
|
||||||
|
|
||||||
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
||||||
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
|
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
|
||||||
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
|
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
|
||||||
|
|
||||||
@ -237,6 +250,8 @@ namespace QCD {
|
|||||||
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
|
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
|
||||||
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
|
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
|
||||||
|
|
||||||
|
template<class GF> using LorentzScalar = Lattice<iScalar<typename GF::vector_object::element> >;
|
||||||
|
|
||||||
// Uhgg... typing this hurt ;)
|
// Uhgg... typing this hurt ;)
|
||||||
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
|
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
|
||||||
typedef Lattice<vColourVector> LatticeStaggeredFermion;
|
typedef Lattice<vColourVector> LatticeStaggeredFermion;
|
||||||
@ -252,47 +267,47 @@ namespace QCD {
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
//spin
|
//spin
|
||||||
template<class vobj> auto peekSpin(const vobj &rhs,int i) -> decltype(peekIndex<SpinIndex>(rhs,0))
|
template<class vobj> auto peekSpin(const vobj &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
|
||||||
{
|
{
|
||||||
return peekIndex<SpinIndex>(rhs,i);
|
return PeekIndex<SpinIndex>(rhs,i);
|
||||||
}
|
}
|
||||||
template<class vobj> auto peekSpin(const vobj &rhs,int i,int j) -> decltype(peekIndex<SpinIndex>(rhs,0,0))
|
template<class vobj> auto peekSpin(const vobj &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
|
||||||
{
|
{
|
||||||
return peekIndex<SpinIndex>(rhs,i,j);
|
return PeekIndex<SpinIndex>(rhs,i,j);
|
||||||
}
|
}
|
||||||
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i) -> decltype(peekIndex<SpinIndex>(rhs,0))
|
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
|
||||||
{
|
{
|
||||||
return peekIndex<SpinIndex>(rhs,i);
|
return PeekIndex<SpinIndex>(rhs,i);
|
||||||
}
|
}
|
||||||
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i,int j) -> decltype(peekIndex<SpinIndex>(rhs,0,0))
|
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
|
||||||
{
|
{
|
||||||
return peekIndex<SpinIndex>(rhs,i,j);
|
return PeekIndex<SpinIndex>(rhs,i,j);
|
||||||
}
|
}
|
||||||
//colour
|
//colour
|
||||||
template<class vobj> auto peekColour(const vobj &rhs,int i) -> decltype(peekIndex<ColourIndex>(rhs,0))
|
template<class vobj> auto peekColour(const vobj &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
|
||||||
{
|
{
|
||||||
return peekIndex<ColourIndex>(rhs,i);
|
return PeekIndex<ColourIndex>(rhs,i);
|
||||||
}
|
}
|
||||||
template<class vobj> auto peekColour(const vobj &rhs,int i,int j) -> decltype(peekIndex<ColourIndex>(rhs,0,0))
|
template<class vobj> auto peekColour(const vobj &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
|
||||||
{
|
{
|
||||||
return peekIndex<ColourIndex>(rhs,i,j);
|
return PeekIndex<ColourIndex>(rhs,i,j);
|
||||||
}
|
}
|
||||||
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i) -> decltype(peekIndex<ColourIndex>(rhs,0))
|
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
|
||||||
{
|
{
|
||||||
return peekIndex<ColourIndex>(rhs,i);
|
return PeekIndex<ColourIndex>(rhs,i);
|
||||||
}
|
}
|
||||||
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i,int j) -> decltype(peekIndex<ColourIndex>(rhs,0,0))
|
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
|
||||||
{
|
{
|
||||||
return peekIndex<ColourIndex>(rhs,i,j);
|
return PeekIndex<ColourIndex>(rhs,i,j);
|
||||||
}
|
}
|
||||||
//lorentz
|
//lorentz
|
||||||
template<class vobj> auto peekLorentz(const vobj &rhs,int i) -> decltype(peekIndex<LorentzIndex>(rhs,0))
|
template<class vobj> auto peekLorentz(const vobj &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
|
||||||
{
|
{
|
||||||
return peekIndex<LorentzIndex>(rhs,i);
|
return PeekIndex<LorentzIndex>(rhs,i);
|
||||||
}
|
}
|
||||||
template<class vobj> auto peekLorentz(const Lattice<vobj> &rhs,int i) -> decltype(peekIndex<LorentzIndex>(rhs,0))
|
template<class vobj> auto peekLorentz(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
|
||||||
{
|
{
|
||||||
return peekIndex<LorentzIndex>(rhs,i);
|
return PeekIndex<LorentzIndex>(rhs,i);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
@ -303,35 +318,35 @@ namespace QCD {
|
|||||||
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs,
|
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs,
|
||||||
int i)
|
int i)
|
||||||
{
|
{
|
||||||
pokeIndex<ColourIndex>(lhs,rhs,i);
|
PokeIndex<ColourIndex>(lhs,rhs,i);
|
||||||
}
|
}
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void pokeColour(Lattice<vobj> &lhs,
|
void pokeColour(Lattice<vobj> &lhs,
|
||||||
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs,
|
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs,
|
||||||
int i,int j)
|
int i,int j)
|
||||||
{
|
{
|
||||||
pokeIndex<ColourIndex>(lhs,rhs,i,j);
|
PokeIndex<ColourIndex>(lhs,rhs,i,j);
|
||||||
}
|
}
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void pokeSpin(Lattice<vobj> &lhs,
|
void pokeSpin(Lattice<vobj> &lhs,
|
||||||
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs,
|
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs,
|
||||||
int i)
|
int i)
|
||||||
{
|
{
|
||||||
pokeIndex<SpinIndex>(lhs,rhs,i);
|
PokeIndex<SpinIndex>(lhs,rhs,i);
|
||||||
}
|
}
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void pokeSpin(Lattice<vobj> &lhs,
|
void pokeSpin(Lattice<vobj> &lhs,
|
||||||
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs,
|
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs,
|
||||||
int i,int j)
|
int i,int j)
|
||||||
{
|
{
|
||||||
pokeIndex<SpinIndex>(lhs,rhs,i,j);
|
PokeIndex<SpinIndex>(lhs,rhs,i,j);
|
||||||
}
|
}
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void pokeLorentz(Lattice<vobj> &lhs,
|
void pokeLorentz(Lattice<vobj> &lhs,
|
||||||
const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs,
|
const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs,
|
||||||
int i)
|
int i)
|
||||||
{
|
{
|
||||||
pokeIndex<LorentzIndex>(lhs,rhs,i);
|
PokeIndex<LorentzIndex>(lhs,rhs,i);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
@ -411,6 +426,11 @@ namespace QCD {
|
|||||||
#include <qcd/utils/LinalgUtils.h>
|
#include <qcd/utils/LinalgUtils.h>
|
||||||
#include <qcd/utils/CovariantCshift.h>
|
#include <qcd/utils/CovariantCshift.h>
|
||||||
#include <qcd/utils/WilsonLoops.h>
|
#include <qcd/utils/WilsonLoops.h>
|
||||||
|
#include <qcd/utils/SUn.h>
|
||||||
#include <qcd/action/Actions.h>
|
#include <qcd/action/Actions.h>
|
||||||
|
#include <qcd/hmc/integrators/Integrator.h>
|
||||||
|
#include <qcd/hmc/integrators/Integrator_algorithm.h>
|
||||||
|
#include <qcd/hmc/HMC.h>
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
58
lib/qcd/action/ActionBase.h
Normal file
58
lib/qcd/action/ActionBase.h
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#ifndef QCD_ACTION_BASE
|
||||||
|
#define QCD_ACTION_BASE
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD{
|
||||||
|
|
||||||
|
template<class GaugeField>
|
||||||
|
class Action {
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Boundary conditions? // Heatbath?
|
||||||
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions
|
||||||
|
virtual RealD S (const GaugeField &U) = 0; // evaluate the action
|
||||||
|
virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative
|
||||||
|
virtual ~Action() {};
|
||||||
|
};
|
||||||
|
|
||||||
|
// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh
|
||||||
|
/*
|
||||||
|
template<class GaugeField, class FermionField>
|
||||||
|
class PseudoFermionAction : public Action<GaugeField> {
|
||||||
|
public:
|
||||||
|
FermionField Phi;
|
||||||
|
GridParallelRNG &pRNG;
|
||||||
|
GridBase &Grid;
|
||||||
|
|
||||||
|
PseudoFermionAction(GridBase &_Grid,GridParallelRNG &_pRNG) : Grid(_Grid), Phi(&_Grid), pRNG(_pRNG) {
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual void refresh(const GaugeField &gauge) {
|
||||||
|
gaussian(Phi,pRNG);
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
|
template<class GaugeField> struct ActionLevel{
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef Action<GaugeField>* ActPtr; // now force the same colours as the rest of the code
|
||||||
|
|
||||||
|
int multiplier;
|
||||||
|
|
||||||
|
std::vector<ActPtr> actions;
|
||||||
|
|
||||||
|
ActionLevel(int mul = 1) : multiplier(mul) {
|
||||||
|
assert (mul > 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
void push_back(ActPtr ptr){
|
||||||
|
actions.push_back(ptr);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class GaugeField> using ActionSet = std::vector<ActionLevel< GaugeField > >;
|
||||||
|
|
||||||
|
|
||||||
|
}}
|
||||||
|
#endif
|
29
lib/qcd/action/ActionParams.h
Normal file
29
lib/qcd/action/ActionParams.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#ifndef GRID_QCD_ACTION_PARAMS_H
|
||||||
|
#define GRID_QCD_ACTION_PARAMS_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
// These can move into a params header and be given MacroMagic serialisation
|
||||||
|
struct GparityWilsonImplParams {
|
||||||
|
std::vector<int> twists;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct WilsonImplParams { };
|
||||||
|
|
||||||
|
struct OneFlavourRationalParams {
|
||||||
|
RealD lo;
|
||||||
|
RealD hi;
|
||||||
|
int MaxIter; // Vector?
|
||||||
|
RealD tolerance; // Vector?
|
||||||
|
int degree=10;
|
||||||
|
int precision=64;
|
||||||
|
|
||||||
|
OneFlavourRationalParams (RealD _lo,RealD _hi,int _maxit,RealD tol=1.0e-8,int _degree = 10,int _precision=64) :
|
||||||
|
lo(_lo), hi(_hi), MaxIter(_maxit), tolerance(tol), degree(_degree), precision(_precision)
|
||||||
|
{};
|
||||||
|
};
|
||||||
|
|
||||||
|
}}
|
||||||
|
|
||||||
|
#endif
|
@ -6,68 +6,173 @@
|
|||||||
// are separating the concept of the operator from that of action.
|
// are separating the concept of the operator from that of action.
|
||||||
//
|
//
|
||||||
// The FermAction contains methods to create
|
// The FermAction contains methods to create
|
||||||
//
|
|
||||||
// * Linear operators (Hermitian and non-hermitian) .. my LinearOperator
|
// * Linear operators (Hermitian and non-hermitian) .. my LinearOperator
|
||||||
// * System solvers (Hermitian and non-hermitian) .. my OperatorFunction
|
// * System solvers (Hermitian and non-hermitian) .. my OperatorFunction
|
||||||
// * MultiShift System solvers (Hermitian and non-hermitian) .. my OperatorFunction
|
// * MultiShift System solvers (Hermitian and non-hermitian) .. my OperatorFunction
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Abstract base interface
|
// Abstract base interface
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#include <qcd/action/fermion/FermionOperator.h>
|
#include <qcd/action/ActionBase.h>
|
||||||
|
#include <qcd/action/ActionParams.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Utility functions
|
// Utility functions
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#include <qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions
|
#include <qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions
|
||||||
|
#include <qcd/action/fermion/FermionOperatorImpl.h>
|
||||||
|
#include <qcd/action/fermion/FermionOperator.h>
|
||||||
#include <qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
#include <qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// 4D formulations
|
// Gauge Actions
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#include <qcd/action/fermion/WilsonFermion.h>
|
#include <qcd/action/gauge/WilsonGaugeAction.h>
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
typedef WilsonGaugeAction<LatticeGaugeField> WilsonGaugeActionR;
|
||||||
|
typedef WilsonGaugeAction<LatticeGaugeFieldF> WilsonGaugeActionF;
|
||||||
|
typedef WilsonGaugeAction<LatticeGaugeFieldD> WilsonGaugeActionD;
|
||||||
|
}}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Explicit explicit template instantiation is still required in the .cc files
|
||||||
|
//
|
||||||
|
// - CayleyFermion5D.cc
|
||||||
|
// - PartialFractionFermion5D.cc
|
||||||
|
// - WilsonFermion5D.cc
|
||||||
|
// - WilsonKernelsHand.cc
|
||||||
|
// - ContinuedFractionFermion5D.cc
|
||||||
|
// - WilsonFermion.cc
|
||||||
|
// - WilsonKernels.cc
|
||||||
|
//
|
||||||
|
// The explicit instantiation is only avoidable if we move this source to headers and end up with include/parse/recompile
|
||||||
|
// for EVERY .cc file. This define centralises the list and restores global push of impl cases
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#define FermOpTemplateInstantiate(A) \
|
||||||
|
template class A<WilsonImplF>; \
|
||||||
|
template class A<WilsonImplD>;
|
||||||
|
// template class A<GparityWilsonImplF>; \
|
||||||
|
// template class A<GparityWilsonImplD>;
|
||||||
|
|
||||||
|
////////////////////////////////////////////
|
||||||
|
// Fermion operators / actions
|
||||||
|
////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include <qcd/action/fermion/WilsonFermion.h> // 4d wilson like
|
||||||
|
#include <qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
|
||||||
|
|
||||||
//#include <qcd/action/fermion/CloverFermion.h>
|
//#include <qcd/action/fermion/CloverFermion.h>
|
||||||
|
|
||||||
////////////////////////////////////////////
|
#include <qcd/action/fermion/CayleyFermion5D.h> // Cayley types
|
||||||
// 5D formulations...
|
|
||||||
////////////////////////////////////////////
|
|
||||||
|
|
||||||
#include <qcd/action/fermion/WilsonFermion5D.h> // used by all 5d overlap types
|
|
||||||
|
|
||||||
//////////
|
|
||||||
// Cayley
|
|
||||||
//////////
|
|
||||||
#include <qcd/action/fermion/CayleyFermion5D.h>
|
|
||||||
|
|
||||||
#include <qcd/action/fermion/DomainWallFermion.h>
|
#include <qcd/action/fermion/DomainWallFermion.h>
|
||||||
#include <qcd/action/fermion/DomainWallFermion.h>
|
#include <qcd/action/fermion/DomainWallFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/MobiusFermion.h>
|
#include <qcd/action/fermion/MobiusFermion.h>
|
||||||
#include <qcd/action/fermion/ScaledShamirFermion.h>
|
#include <qcd/action/fermion/ScaledShamirFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
|
|
||||||
|
|
||||||
#include <qcd/action/fermion/MobiusZolotarevFermion.h>
|
#include <qcd/action/fermion/MobiusZolotarevFermion.h>
|
||||||
#include <qcd/action/fermion/ShamirZolotarevFermion.h>
|
#include <qcd/action/fermion/ShamirZolotarevFermion.h>
|
||||||
|
#include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
||||||
|
|
||||||
//////////////////////
|
#include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
|
||||||
// Continued fraction
|
|
||||||
//////////////////////
|
|
||||||
#include <qcd/action/fermion/ContinuedFractionFermion5D.h>
|
|
||||||
#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
|
||||||
|
|
||||||
//////////////////////
|
#include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
|
||||||
// Partial fraction
|
|
||||||
//////////////////////
|
|
||||||
#include <qcd/action/fermion/PartialFractionFermion5D.h>
|
|
||||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// More maintainable to maintain the following typedef list centrally, as more "impl" targets
|
||||||
|
// are added, (e.g. extension for gparity, half precision project in comms etc..)
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
// Cayley 5d
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
typedef WilsonFermion<WilsonImplR> WilsonFermionR;
|
||||||
|
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
||||||
|
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
||||||
|
|
||||||
|
typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;
|
||||||
|
typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
|
||||||
|
typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
|
||||||
|
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
||||||
|
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
||||||
|
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
||||||
|
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
||||||
|
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
|
||||||
|
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
|
||||||
|
|
||||||
|
typedef MobiusZolotarevFermion<WilsonImplR> MobiusZolotarevFermionR;
|
||||||
|
typedef MobiusZolotarevFermion<WilsonImplF> MobiusZolotarevFermionF;
|
||||||
|
typedef MobiusZolotarevFermion<WilsonImplD> MobiusZolotarevFermionD;
|
||||||
|
typedef ShamirZolotarevFermion<WilsonImplR> ShamirZolotarevFermionR;
|
||||||
|
typedef ShamirZolotarevFermion<WilsonImplF> ShamirZolotarevFermionF;
|
||||||
|
typedef ShamirZolotarevFermion<WilsonImplD> ShamirZolotarevFermionD;
|
||||||
|
|
||||||
|
typedef OverlapWilsonCayleyTanhFermion<WilsonImplR> OverlapWilsonCayleyTanhFermionR;
|
||||||
|
typedef OverlapWilsonCayleyTanhFermion<WilsonImplF> OverlapWilsonCayleyTanhFermionF;
|
||||||
|
typedef OverlapWilsonCayleyTanhFermion<WilsonImplD> OverlapWilsonCayleyTanhFermionD;
|
||||||
|
typedef OverlapWilsonCayleyZolotarevFermion<WilsonImplR> OverlapWilsonCayleyZolotarevFermionR;
|
||||||
|
typedef OverlapWilsonCayleyZolotarevFermion<WilsonImplF> OverlapWilsonCayleyZolotarevFermionF;
|
||||||
|
typedef OverlapWilsonCayleyZolotarevFermion<WilsonImplD> OverlapWilsonCayleyZolotarevFermionD;
|
||||||
|
|
||||||
|
// Continued fraction
|
||||||
|
typedef OverlapWilsonContFracTanhFermion<WilsonImplR> OverlapWilsonContFracTanhFermionR;
|
||||||
|
typedef OverlapWilsonContFracTanhFermion<WilsonImplF> OverlapWilsonContFracTanhFermionF;
|
||||||
|
typedef OverlapWilsonContFracTanhFermion<WilsonImplD> OverlapWilsonContFracTanhFermionD;
|
||||||
|
typedef OverlapWilsonContFracZolotarevFermion<WilsonImplR> OverlapWilsonContFracZolotarevFermionR;
|
||||||
|
typedef OverlapWilsonContFracZolotarevFermion<WilsonImplF> OverlapWilsonContFracZolotarevFermionF;
|
||||||
|
typedef OverlapWilsonContFracZolotarevFermion<WilsonImplD> OverlapWilsonContFracZolotarevFermionD;
|
||||||
|
|
||||||
|
// Partial fraction
|
||||||
|
typedef OverlapWilsonPartialFractionTanhFermion<WilsonImplR> OverlapWilsonPartialFractionTanhFermionR;
|
||||||
|
typedef OverlapWilsonPartialFractionTanhFermion<WilsonImplF> OverlapWilsonPartialFractionTanhFermionF;
|
||||||
|
typedef OverlapWilsonPartialFractionTanhFermion<WilsonImplD> OverlapWilsonPartialFractionTanhFermionD;
|
||||||
|
|
||||||
|
typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplR> OverlapWilsonPartialFractionZolotarevFermionR;
|
||||||
|
typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplF> OverlapWilsonPartialFractionZolotarevFermionF;
|
||||||
|
typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplD> OverlapWilsonPartialFractionZolotarevFermionD;
|
||||||
|
|
||||||
|
// Gparity cases; partial list until tested
|
||||||
|
typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR;
|
||||||
|
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
|
||||||
|
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
|
||||||
|
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
|
||||||
|
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
|
||||||
|
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
|
||||||
|
|
||||||
|
}}
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
|
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
#include <qcd/action/fermion/g5HermitianLinop.h>
|
#include <qcd/action/fermion/g5HermitianLinop.h>
|
||||||
|
|
||||||
|
////////////////////////////////////////
|
||||||
|
// Pseudo fermion combinations for HMC
|
||||||
|
////////////////////////////////////////
|
||||||
|
#include <qcd/action/pseudofermion/EvenOddSchurDifferentiable.h>
|
||||||
|
|
||||||
|
#include <qcd/action/pseudofermion/TwoFlavour.h>
|
||||||
|
#include <qcd/action/pseudofermion/TwoFlavourRatio.h>
|
||||||
|
#include <qcd/action/pseudofermion/TwoFlavourEvenOdd.h>
|
||||||
|
#include <qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h>
|
||||||
|
|
||||||
|
//IroIro inserted general "Nf" param; could also be done,
|
||||||
|
//but not clear why unless into large Nf BSM studies
|
||||||
|
//Even there, don't want the explicit (2) on power denominator
|
||||||
|
//if even number of flavours, so further generalised interface
|
||||||
|
//would be required but easy.
|
||||||
|
#include <qcd/action/pseudofermion/OneFlavourRational.h>
|
||||||
|
#include <qcd/action/pseudofermion/OneFlavourRationalRatio.h>
|
||||||
|
#include <qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
|
||||||
|
#include <qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,27 +2,27 @@
|
|||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
CayleyFermion5D::CayleyFermion5D(LatticeGaugeField &_Umu,
|
template<class Impl>
|
||||||
GridCartesian &FiveDimGrid,
|
CayleyFermion5D<Impl>::CayleyFermion5D(GaugeField &_Umu,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridCartesian &FourDimGrid,
|
||||||
RealD _mass,RealD _M5) :
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
WilsonFermion5D(_Umu,
|
RealD _mass,RealD _M5,const ImplParams &p) :
|
||||||
|
WilsonFermion5D<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_M5),
|
FourDimRedBlackGrid,_M5,p),
|
||||||
mass(_mass)
|
mass(_mass)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
// override multiply
|
template<class Impl>
|
||||||
RealD CayleyFermion5D::M (const LatticeFermion &psi, LatticeFermion &chi)
|
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
|
||||||
{
|
{
|
||||||
LatticeFermion Din(psi._grid);
|
|
||||||
|
|
||||||
// Assemble Din
|
// Assemble Din
|
||||||
|
int Ls=this->Ls;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
if ( s==0 ) {
|
if ( s==0 ) {
|
||||||
// Din = bs psi[s] + cs[s] psi[s+1}
|
// Din = bs psi[s] + cs[s] psi[s+1}
|
||||||
@ -37,11 +37,57 @@ namespace QCD {
|
|||||||
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
|
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==0 ) {
|
||||||
|
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
|
||||||
|
axpby_ssp_pminus(Din,1.0,Din,-mass*cs[Ls-1],psi,s,Ls-1);
|
||||||
|
} else if ( s==(Ls-1)) {
|
||||||
|
axpby_ssp_pplus (Din,bs[s],psi,-mass*cs[0],psi,s,0);
|
||||||
|
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
|
||||||
|
} else {
|
||||||
|
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
|
||||||
|
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
DW(Din,chi,DaggerNo);
|
// override multiply
|
||||||
|
template<class Impl>
|
||||||
|
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
|
FermionField Din(psi._grid);
|
||||||
|
|
||||||
|
// Assemble Din
|
||||||
|
/*
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==0 ) {
|
||||||
|
// Din = bs psi[s] + cs[s] psi[s+1}
|
||||||
|
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
|
||||||
|
// Din+= -mass*cs[s] psi[s+1}
|
||||||
|
axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
|
||||||
|
} else if ( s==(Ls-1)) {
|
||||||
|
axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
|
||||||
|
axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
|
||||||
|
} else {
|
||||||
|
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
|
||||||
|
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
Meooe5D(psi,Din);
|
||||||
|
|
||||||
|
this->DW(Din,chi,DaggerNo);
|
||||||
// ((b D_W + D_w hop terms +1) on s-diag
|
// ((b D_W + D_w hop terms +1) on s-diag
|
||||||
axpby(chi,1.0,1.0,chi,psi);
|
axpby(chi,1.0,1.0,chi,psi);
|
||||||
|
|
||||||
|
// Call Mooee??
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
if ( s==0 ){
|
if ( s==0 ){
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
|
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
|
||||||
@ -57,20 +103,26 @@ namespace QCD {
|
|||||||
return norm2(chi);
|
return norm2(chi);
|
||||||
}
|
}
|
||||||
|
|
||||||
RealD CayleyFermion5D::Mdag (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
// Under adjoint
|
// Under adjoint
|
||||||
//D1+ D1- P- -> D1+^dag P+ D2-^dag
|
//D1+ D1- P- -> D1+^dag P+ D2-^dag
|
||||||
//D2- P+ D2+ P-D1-^dag D2+dag
|
//D2- P+ D2+ P-D1-^dag D2+dag
|
||||||
|
|
||||||
LatticeFermion Din(psi._grid);
|
FermionField Din(psi._grid);
|
||||||
// Apply Dw
|
// Apply Dw
|
||||||
DW(psi,Din,DaggerYes);
|
this->DW(psi,Din,DaggerYes);
|
||||||
|
|
||||||
|
Meooe5D(Din,chi);
|
||||||
|
|
||||||
|
int Ls=this->Ls;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
|
|
||||||
// Collect the terms in DW
|
// Collect the terms in DW
|
||||||
// Chi = bs Din[s] + cs[s] Din[s+1}
|
// Chi = bs Din[s] + cs[s] Din[s+1}
|
||||||
// Chi+= -mass*cs[s] psi[s+1}
|
// Chi+= -mass*cs[s] psi[s+1}
|
||||||
|
/*
|
||||||
if ( s==0 ) {
|
if ( s==0 ) {
|
||||||
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
|
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-mass*cs[Ls-1],Din,s,Ls-1);
|
axpby_ssp_pminus(chi,1.0,chi,-mass*cs[Ls-1],Din,s,Ls-1);
|
||||||
@ -81,6 +133,10 @@ namespace QCD {
|
|||||||
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
|
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
|
||||||
axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
|
axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
// FIXME just call MooeeDag??
|
||||||
|
|
||||||
// Collect the terms indept of DW
|
// Collect the terms indept of DW
|
||||||
if ( s==0 ){
|
if ( s==0 ){
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1);
|
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1);
|
||||||
@ -99,10 +155,17 @@ namespace QCD {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
void CayleyFermion5D::Meooe (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
LatticeFermion tmp(psi._grid);
|
int Ls=this->Ls;
|
||||||
|
|
||||||
|
FermionField tmp(psi._grid);
|
||||||
// Assemble the 5d matrix
|
// Assemble the 5d matrix
|
||||||
|
Meooe5D(psi,tmp);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
if ( s==0 ) {
|
if ( s==0 ) {
|
||||||
// tmp = bs psi[s] + cs[s] psi[s+1}
|
// tmp = bs psi[s] + cs[s] psi[s+1}
|
||||||
@ -117,24 +180,33 @@ namespace QCD {
|
|||||||
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
std::cout << "Meooe Test replacement norm2 tmp old = " <<norm2(tmp)<<std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Apply 4d dslash
|
// Apply 4d dslash
|
||||||
if ( psi.checkerboard == Odd ) {
|
if ( psi.checkerboard == Odd ) {
|
||||||
DhopEO(tmp,chi,DaggerNo);
|
this->DhopEO(tmp,chi,DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
DhopOE(tmp,chi,DaggerNo);
|
this->DhopOE(tmp,chi,DaggerNo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CayleyFermion5D::MeooeDag (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
LatticeFermion tmp(psi._grid);
|
FermionField tmp(psi._grid);
|
||||||
// Apply 4d dslash
|
// Apply 4d dslash
|
||||||
if ( psi.checkerboard == Odd ) {
|
if ( psi.checkerboard == Odd ) {
|
||||||
DhopEO(psi,tmp,DaggerYes);
|
this->DhopEO(psi,tmp,DaggerYes);
|
||||||
} else {
|
} else {
|
||||||
DhopOE(psi,tmp,DaggerYes);
|
this->DhopOE(psi,tmp,DaggerYes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Meooe5D(tmp,chi);
|
||||||
|
#if 0
|
||||||
|
std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl;
|
||||||
// Assemble the 5d matrix
|
// Assemble the 5d matrix
|
||||||
|
int Ls=this->Ls;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
if ( s==0 ) {
|
if ( s==0 ) {
|
||||||
axpby_ssp_pplus(chi,beo[s],tmp, -ceo[s+1] ,tmp,s,s+1);
|
axpby_ssp_pplus(chi,beo[s],tmp, -ceo[s+1] ,tmp,s,s+1);
|
||||||
@ -147,10 +219,15 @@ namespace QCD {
|
|||||||
axpby_ssp_pminus(chi,1.0 ,chi,-ceo[s-1],tmp,s,s-1);
|
axpby_ssp_pminus(chi,1.0 ,chi,-ceo[s-1],tmp,s,s-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
std::cout << "Meooe Test replacement norm2 chi old = " <<norm2(chi)<<std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CayleyFermion5D::Mooee (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
for (int s=0;s<Ls;s++){
|
for (int s=0;s<Ls;s++){
|
||||||
if ( s==0 ) {
|
if ( s==0 ) {
|
||||||
axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
|
axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
|
||||||
@ -165,8 +242,10 @@ namespace QCD {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CayleyFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
|
template<class Impl>
|
||||||
LatticeFermion tmp(psi._grid);
|
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
||||||
|
int Ls=this->Ls;
|
||||||
|
FermionField tmp(psi._grid);
|
||||||
// Assemble the 5d matrix
|
// Assemble the 5d matrix
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
if ( s==0 ) {
|
if ( s==0 ) {
|
||||||
@ -183,11 +262,13 @@ namespace QCD {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Apply 4d dslash fragment
|
// Apply 4d dslash fragment
|
||||||
DhopDir(tmp,chi,dir,disp);
|
this->DhopDir(tmp,chi,dir,disp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CayleyFermion5D::MooeeDag (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
for (int s=0;s<Ls;s++){
|
for (int s=0;s<Ls;s++){
|
||||||
// Assemble the 5d matrix
|
// Assemble the 5d matrix
|
||||||
if ( s==0 ) {
|
if ( s==0 ) {
|
||||||
@ -203,8 +284,10 @@ namespace QCD {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CayleyFermion5D::MooeeInv (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
// Apply (L^{\prime})^{-1}
|
// Apply (L^{\prime})^{-1}
|
||||||
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
||||||
for (int s=1;s<Ls;s++){
|
for (int s=1;s<Ls;s++){
|
||||||
@ -227,8 +310,10 @@ namespace QCD {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CayleyFermion5D::MooeeInvDag (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
// Apply (U^{\prime})^{-dagger}
|
// Apply (U^{\prime})^{-dagger}
|
||||||
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
||||||
for (int s=1;s<Ls;s++){
|
for (int s=1;s<Ls;s++){
|
||||||
@ -249,16 +334,66 @@ namespace QCD {
|
|||||||
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
|
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
|
{
|
||||||
|
FermionField Din(V._grid);
|
||||||
|
|
||||||
|
if ( dag == DaggerNo ) {
|
||||||
|
// U d/du [D_w D5] V = U d/du DW D5 V
|
||||||
|
Meooe5D(V,Din);
|
||||||
|
this->DhopDeriv(mat,U,Din,dag);
|
||||||
|
} else {
|
||||||
|
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
||||||
|
Meooe5D(U,Din);
|
||||||
|
this->DhopDeriv(mat,Din,V,dag);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
|
{
|
||||||
|
FermionField Din(V._grid);
|
||||||
|
|
||||||
|
if ( dag == DaggerNo ) {
|
||||||
|
// U d/du [D_w D5] V = U d/du DW D5 V
|
||||||
|
Meooe5D(V,Din);
|
||||||
|
this->DhopDerivOE(mat,U,Din,dag);
|
||||||
|
} else {
|
||||||
|
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
||||||
|
Meooe5D(U,Din);
|
||||||
|
this->DhopDerivOE(mat,Din,V,dag);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
|
{
|
||||||
|
FermionField Din(V._grid);
|
||||||
|
|
||||||
|
if ( dag == DaggerNo ) {
|
||||||
|
// U d/du [D_w D5] V = U d/du DW D5 V
|
||||||
|
Meooe5D(V,Din);
|
||||||
|
this->DhopDerivEO(mat,U,Din,dag);
|
||||||
|
} else {
|
||||||
|
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
||||||
|
Meooe5D(U,Din);
|
||||||
|
this->DhopDerivEO(mat,Din,V,dag);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Tanh
|
// Tanh
|
||||||
void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
|
||||||
{
|
{
|
||||||
SetCoefficientsZolotarev(1.0,zdata,b,c);
|
SetCoefficientsZolotarev(1.0,zdata,b,c);
|
||||||
|
|
||||||
}
|
}
|
||||||
//Zolo
|
//Zolo
|
||||||
void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
|
||||||
{
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
// The Cayley coeffs (unprec)
|
// The Cayley coeffs (unprec)
|
||||||
@ -308,8 +443,8 @@ namespace QCD {
|
|||||||
ceo.resize(Ls);
|
ceo.resize(Ls);
|
||||||
|
|
||||||
for(int i=0;i<Ls;i++){
|
for(int i=0;i<Ls;i++){
|
||||||
bee[i]=as[i]*(bs[i]*(4.0-M5) +1.0);
|
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
|
||||||
cee[i]=as[i]*(1.0-cs[i]*(4.0-M5));
|
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
|
||||||
beo[i]=as[i]*bs[i];
|
beo[i]=as[i]*bs[i];
|
||||||
ceo[i]=-as[i]*cs[i];
|
ceo[i]=-as[i]*cs[i];
|
||||||
}
|
}
|
||||||
@ -362,6 +497,8 @@ namespace QCD {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FermOpTemplateInstantiate(CayleyFermion5D);
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,25 +5,36 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class CayleyFermion5D : public WilsonFermion5D
|
template<class Impl>
|
||||||
|
class CayleyFermion5D : public WilsonFermion5D<Impl>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
virtual RealD M (const LatticeFermion &in, LatticeFermion &out);
|
virtual RealD M (const FermionField &in, FermionField &out);
|
||||||
virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out);
|
virtual RealD Mdag (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
virtual void Meooe (const LatticeFermion &in, LatticeFermion &out);
|
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||||
virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MeooeDag (const FermionField &in, FermionField &out);
|
||||||
virtual void Mooee (const LatticeFermion &in, LatticeFermion &out);
|
virtual void Mooee (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||||
virtual void Instantiatable(void)=0;
|
virtual void Instantiatable(void)=0;
|
||||||
|
|
||||||
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
|
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
|
||||||
// Efficient support for multigrid coarsening
|
// Efficient support for multigrid coarsening
|
||||||
virtual void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||||
|
|
||||||
|
void Meooe5D (const FermionField &in, FermionField &out);
|
||||||
|
void MeooeDag5D (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
// protected:
|
// protected:
|
||||||
RealD mass;
|
RealD mass;
|
||||||
@ -48,12 +59,12 @@ namespace Grid {
|
|||||||
std::vector<RealD> dee;
|
std::vector<RealD> dee;
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
CayleyFermion5D(LatticeGaugeField &_Umu,
|
CayleyFermion5D(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5);
|
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||||
|
@ -3,20 +3,22 @@
|
|||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
|
||||||
{
|
{
|
||||||
SetCoefficientsZolotarev(1.0/scale,zdata);
|
SetCoefficientsZolotarev(1.0/scale,zdata);
|
||||||
}
|
}
|
||||||
void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
|
||||||
{
|
{
|
||||||
// How to check Ls matches??
|
// How to check Ls matches??
|
||||||
// std::cout << Ls << " Ls"<<std::endl;
|
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
|
||||||
// std::cout << zdata->n << " - n"<<std::endl;
|
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
|
||||||
// std::cout << zdata->da << " -da "<<std::endl;
|
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
|
||||||
// std::cout << zdata->db << " -db"<<std::endl;
|
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
|
||||||
// std::cout << zdata->dn << " -dn"<<std::endl;
|
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
|
||||||
// std::cout << zdata->dd << " -dd"<<std::endl;
|
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
|
||||||
|
int Ls = this->Ls;
|
||||||
assert(zdata->db==Ls);// Beta has Ls coeffs
|
assert(zdata->db==Ls);// Beta has Ls coeffs
|
||||||
|
|
||||||
R=(1+this->mass)/(1-this->mass);
|
R=(1+this->mass)/(1-this->mass);
|
||||||
@ -39,7 +41,7 @@ namespace Grid {
|
|||||||
|
|
||||||
|
|
||||||
ZoloHiInv =1.0/zolo_hi;
|
ZoloHiInv =1.0/zolo_hi;
|
||||||
dw_diag = (4.0-M5)*ZoloHiInv;
|
dw_diag = (4.0-this->M5)*ZoloHiInv;
|
||||||
|
|
||||||
See.resize(Ls);
|
See.resize(Ls);
|
||||||
Aee.resize(Ls);
|
Aee.resize(Ls);
|
||||||
@ -55,17 +57,20 @@ namespace Grid {
|
|||||||
See[s] = Aee[s] - 1.0/See[s-1];
|
See[s] = Aee[s] - 1.0/See[s-1];
|
||||||
}
|
}
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
std::cout <<"s = "<<s<<" Beta "<<Beta[s]<<" Aee "<<Aee[s] <<" See "<<See[s] <<std::endl;
|
std::cout<<GridLogMessage <<"s = "<<s<<" Beta "<<Beta[s]<<" Aee "<<Aee[s] <<" See "<<See[s] <<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
RealD ContinuedFractionFermion5D::M (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
RealD ContinuedFractionFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
LatticeFermion D(psi._grid);
|
int Ls = this->Ls;
|
||||||
|
|
||||||
DW(psi,D,DaggerNo);
|
FermionField D(psi._grid);
|
||||||
|
|
||||||
|
this->DW(psi,D,DaggerNo);
|
||||||
|
|
||||||
int sign=1;
|
int sign=1;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
@ -83,15 +88,20 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
return norm2(chi);
|
return norm2(chi);
|
||||||
}
|
}
|
||||||
RealD ContinuedFractionFermion5D::Mdag (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
RealD ContinuedFractionFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
// This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
|
// This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
|
||||||
// The rest of matrix is symmetric.
|
// The rest of matrix is symmetric.
|
||||||
// Can ignore "dag"
|
// Can ignore "dag"
|
||||||
return M(psi,chi);
|
return M(psi,chi);
|
||||||
}
|
}
|
||||||
void ContinuedFractionFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
|
template<class Impl>
|
||||||
DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
|
void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
|
this->DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
|
||||||
|
|
||||||
int sign=1;
|
int sign=1;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
if ( s==(Ls-1) ){
|
if ( s==(Ls-1) ){
|
||||||
@ -102,13 +112,16 @@ namespace Grid {
|
|||||||
sign=-sign;
|
sign=-sign;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void ContinuedFractionFermion5D::Meooe (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
// Apply 4d dslash
|
// Apply 4d dslash
|
||||||
if ( psi.checkerboard == Odd ) {
|
if ( psi.checkerboard == Odd ) {
|
||||||
DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
|
this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
|
||||||
} else {
|
} else {
|
||||||
DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
|
this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
|
||||||
}
|
}
|
||||||
|
|
||||||
int sign=1;
|
int sign=1;
|
||||||
@ -121,12 +134,16 @@ namespace Grid {
|
|||||||
sign=-sign;
|
sign=-sign;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void ContinuedFractionFermion5D::MeooeDag (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
Meooe(psi,chi);
|
this->Meooe(psi,chi);
|
||||||
}
|
}
|
||||||
void ContinuedFractionFermion5D::Mooee (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
int sign=1;
|
int sign=1;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
if ( s==0 ) {
|
if ( s==0 ) {
|
||||||
@ -144,12 +161,16 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ContinuedFractionFermion5D::MooeeDag (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
Mooee(psi,chi);
|
this->Mooee(psi,chi);
|
||||||
}
|
}
|
||||||
void ContinuedFractionFermion5D::MooeeInv (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
// Apply Linv
|
// Apply Linv
|
||||||
axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0);
|
axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0);
|
||||||
for(int s=1;s<Ls;s++){
|
for(int s=1;s<Ls;s++){
|
||||||
@ -165,27 +186,88 @@ namespace Grid {
|
|||||||
axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1);
|
axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void ContinuedFractionFermion5D::MooeeInvDag (const LatticeFermion &psi, LatticeFermion &chi)
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
MooeeInv(psi,chi);
|
this->MooeeInv(psi,chi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
|
FermionField D(V._grid);
|
||||||
|
|
||||||
|
int sign=1;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==(Ls-1) ){
|
||||||
|
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
|
||||||
|
} else {
|
||||||
|
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
|
||||||
|
}
|
||||||
|
sign=-sign;
|
||||||
|
}
|
||||||
|
this->DhopDeriv(mat,D,V,DaggerNo);
|
||||||
|
};
|
||||||
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
|
FermionField D(V._grid);
|
||||||
|
|
||||||
|
int sign=1;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==(Ls-1) ){
|
||||||
|
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
|
||||||
|
} else {
|
||||||
|
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
|
||||||
|
}
|
||||||
|
sign=-sign;
|
||||||
|
}
|
||||||
|
this->DhopDerivOE(mat,D,V,DaggerNo);
|
||||||
|
};
|
||||||
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
|
FermionField D(V._grid);
|
||||||
|
|
||||||
|
int sign=1;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==(Ls-1) ){
|
||||||
|
ag5xpby_ssp(D,Beta[s]*ZoloHiInv,U,0.0,U,s,s);
|
||||||
|
} else {
|
||||||
|
ag5xpby_ssp(D,cc[s]*Beta[s]*sign*ZoloHiInv,U,0.0,U,s,s);
|
||||||
|
}
|
||||||
|
sign=-sign;
|
||||||
|
}
|
||||||
|
this->DhopDerivEO(mat,D,V,DaggerNo);
|
||||||
|
};
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
ContinuedFractionFermion5D::ContinuedFractionFermion5D(
|
template<class Impl>
|
||||||
LatticeGaugeField &_Umu,
|
ContinuedFractionFermion5D<Impl>::ContinuedFractionFermion5D(
|
||||||
|
GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD M5) :
|
RealD _mass,RealD M5,const ImplParams &p) :
|
||||||
WilsonFermion5D(_Umu,
|
WilsonFermion5D<Impl>(_Umu,
|
||||||
FiveDimGrid, FiveDimRedBlackGrid,
|
FiveDimGrid, FiveDimRedBlackGrid,
|
||||||
FourDimGrid, FourDimRedBlackGrid,M5),
|
FourDimGrid, FourDimRedBlackGrid,M5,p),
|
||||||
mass(_mass)
|
mass(_mass)
|
||||||
{
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
assert((Ls&0x1)==1); // Odd Ls required
|
assert((Ls&0x1)==1); // Odd Ls required
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FermOpTemplateInstantiate(ContinuedFractionFermion5D);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,35 +5,43 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class ContinuedFractionFermion5D : public WilsonFermion5D
|
template<class Impl>
|
||||||
|
class ContinuedFractionFermion5D : public WilsonFermion5D<Impl>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
virtual RealD M (const LatticeFermion &in, LatticeFermion &out);
|
virtual RealD M (const FermionField &in, FermionField &out);
|
||||||
virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out);
|
virtual RealD Mdag (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
// half checkerboard operaions
|
// half checkerboard operaions
|
||||||
virtual void Meooe (const LatticeFermion &in, LatticeFermion &out);
|
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||||
virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MeooeDag (const FermionField &in, FermionField &out);
|
||||||
virtual void Mooee (const LatticeFermion &in, LatticeFermion &out);
|
virtual void Mooee (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
|
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
|
||||||
// virtual void Instantiatable(void)=0;
|
// virtual void Instantiatable(void)=0;
|
||||||
virtual void Instantiatable(void) =0;
|
virtual void Instantiatable(void) =0;
|
||||||
|
|
||||||
// Efficient support for multigrid coarsening
|
// Efficient support for multigrid coarsening
|
||||||
virtual void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
ContinuedFractionFermion5D(LatticeGaugeField &_Umu,
|
ContinuedFractionFermion5D(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD M5);
|
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
@ -7,24 +7,27 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class DomainWallFermion : public CayleyFermion5D
|
template<class Impl>
|
||||||
|
class DomainWallFermion : public CayleyFermion5D<Impl>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
virtual void Instantiatable(void) {};
|
virtual void Instantiatable(void) {};
|
||||||
// Constructors
|
// Constructors
|
||||||
DomainWallFermion(LatticeGaugeField &_Umu,
|
DomainWallFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5) :
|
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) :
|
||||||
|
|
||||||
CayleyFermion5D(_Umu,
|
CayleyFermion5D<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_mass,_M5)
|
FourDimRedBlackGrid,_mass,_M5,p)
|
||||||
|
|
||||||
{
|
{
|
||||||
RealD eps = 1.0;
|
RealD eps = 1.0;
|
||||||
@ -32,9 +35,9 @@ namespace Grid {
|
|||||||
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
|
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
|
||||||
assert(zdata->n==this->Ls);
|
assert(zdata->n==this->Ls);
|
||||||
|
|
||||||
std::cout << "DomainWallFermion with Ls="<<Ls<<std::endl;
|
std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
|
||||||
// Call base setter
|
// Call base setter
|
||||||
this->CayleyFermion5D::SetCoefficientsTanh(zdata,1.0,0.0);
|
this->SetCoefficientsTanh(zdata,1.0,0.0);
|
||||||
|
|
||||||
Approx::zolotarev_free(zdata);
|
Approx::zolotarev_free(zdata);
|
||||||
}
|
}
|
||||||
|
@ -5,16 +5,20 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
// Four component fermions
|
// Allow to select between gauge representation rank bc's, flavours etc.
|
||||||
// Should type template the vector and gauge types
|
// and single/double precision.
|
||||||
// Think about multiple representations
|
////////////////////////////////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
template<class FermionField,class GaugeField>
|
template<class Impl>
|
||||||
class FermionOperator : public CheckerBoardedSparseMatrixBase<FermionField>
|
class FermionOperator : public CheckerBoardedSparseMatrixBase<typename Impl::FermionField>, public Impl
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
|
||||||
|
FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
|
||||||
|
|
||||||
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
|
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
|
||||||
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
|
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
|
||||||
|
|
||||||
@ -28,6 +32,8 @@ namespace Grid {
|
|||||||
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
|
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
|
||||||
|
|
||||||
// half checkerboard operaions
|
// half checkerboard operaions
|
||||||
|
virtual int ConstEE(void) { return 1; }; // clover returns zero as EE depends on gauge field
|
||||||
|
|
||||||
virtual void Meooe (const FermionField &in, FermionField &out)=0;
|
virtual void Meooe (const FermionField &in, FermionField &out)=0;
|
||||||
virtual void MeooeDag (const FermionField &in, FermionField &out)=0;
|
virtual void MeooeDag (const FermionField &in, FermionField &out)=0;
|
||||||
virtual void Mooee (const FermionField &in, FermionField &out)=0;
|
virtual void Mooee (const FermionField &in, FermionField &out)=0;
|
||||||
@ -39,13 +45,31 @@ namespace Grid {
|
|||||||
virtual void Dhop (const FermionField &in, FermionField &out,int dag)=0;
|
virtual void Dhop (const FermionField &in, FermionField &out,int dag)=0;
|
||||||
virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
|
virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
|
||||||
virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
|
virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
|
||||||
|
virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
|
||||||
|
|
||||||
virtual void Mdiag(const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);};
|
||||||
virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
|
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);};
|
||||||
|
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);};
|
||||||
|
virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;}; // Clover can override these
|
||||||
|
virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;};
|
||||||
|
|
||||||
|
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
||||||
|
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
||||||
|
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
||||||
|
|
||||||
|
|
||||||
|
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
||||||
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||||
|
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
// Updates gauge field during HMC
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
virtual void ImportGauge(const GaugeField & _U)=0;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
359
lib/qcd/action/fermion/FermionOperatorImpl.h
Normal file
359
lib/qcd/action/fermion/FermionOperatorImpl.h
Normal file
@ -0,0 +1,359 @@
|
|||||||
|
#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H
|
||||||
|
#define GRID_QCD_FERMION_OPERATOR_IMPL_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////
|
||||||
|
// Template parameter class constructs to package
|
||||||
|
// externally control Fermion implementations
|
||||||
|
// in orthogonal directions
|
||||||
|
//
|
||||||
|
// Ultimately need Impl to always define types where XXX is opaque
|
||||||
|
//
|
||||||
|
// typedef typename XXX Simd;
|
||||||
|
// typedef typename XXX GaugeLinkField;
|
||||||
|
// typedef typename XXX GaugeField;
|
||||||
|
// typedef typename XXX GaugeActField;
|
||||||
|
// typedef typename XXX FermionField;
|
||||||
|
// typedef typename XXX DoubledGaugeField;
|
||||||
|
// typedef typename XXX SiteSpinor;
|
||||||
|
// typedef typename XXX SiteHalfSpinor;
|
||||||
|
// typedef typename XXX Compressor;
|
||||||
|
//
|
||||||
|
// and Methods:
|
||||||
|
// void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||||
|
// void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||||
|
// void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
|
||||||
|
// void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
||||||
|
// void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// To acquire the typedefs from "Base" (either a base class or template param) use:
|
||||||
|
//
|
||||||
|
// INHERIT_GIMPL_TYPES(Base)
|
||||||
|
// INHERIT_FIMPL_TYPES(Base)
|
||||||
|
// INHERIT_IMPL_TYPES(Base)
|
||||||
|
//
|
||||||
|
// The Fermion operators will do the following:
|
||||||
|
//
|
||||||
|
// struct MyOpParams {
|
||||||
|
// RealD mass;
|
||||||
|
// };
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// template<class Impl>
|
||||||
|
// class MyOp : pubic<Impl> {
|
||||||
|
// public:
|
||||||
|
//
|
||||||
|
// INHERIT_ALL_IMPL_TYPES(Impl);
|
||||||
|
//
|
||||||
|
// MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam)
|
||||||
|
// {
|
||||||
|
//
|
||||||
|
// };
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Implementation dependent gauge types
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#define INHERIT_IMPL_TYPES(Base) \
|
||||||
|
INHERIT_GIMPL_TYPES(Base)\
|
||||||
|
INHERIT_FIMPL_TYPES(Base)
|
||||||
|
|
||||||
|
#define INHERIT_GIMPL_TYPES(GImpl) \
|
||||||
|
typedef typename GImpl::Simd Simd;\
|
||||||
|
typedef typename GImpl::GaugeLinkField GaugeLinkField;\
|
||||||
|
typedef typename GImpl::GaugeField GaugeField;
|
||||||
|
|
||||||
|
// Composition with smeared link, bc's etc.. probably need multiple inheritance
|
||||||
|
// Variable precision "S" and variable Nc
|
||||||
|
template<class S,int Nrepresentation=Nc>
|
||||||
|
class ImplGauge {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef S Simd;
|
||||||
|
|
||||||
|
template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
||||||
|
template<typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >;
|
||||||
|
|
||||||
|
typedef iImplGaugeLink <Simd> SiteGaugeLink;
|
||||||
|
typedef iImplGaugeField <Simd> SiteGaugeField;
|
||||||
|
|
||||||
|
typedef Lattice<SiteGaugeLink> GaugeLinkField; // bit ugly naming; polarised gauge field, lorentz... all ugly
|
||||||
|
typedef Lattice<SiteGaugeField> GaugeField;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Implementation dependent fermion types
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#define INHERIT_FIMPL_TYPES(Impl)\
|
||||||
|
typedef typename Impl::FermionField FermionField; \
|
||||||
|
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
|
||||||
|
typedef typename Impl::SiteSpinor SiteSpinor; \
|
||||||
|
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
||||||
|
typedef typename Impl::Compressor Compressor; \
|
||||||
|
typedef typename Impl::StencilImpl StencilImpl; \
|
||||||
|
typedef typename Impl::ImplParams ImplParams;
|
||||||
|
|
||||||
|
///////
|
||||||
|
// Single flavour four spinors with colour index
|
||||||
|
///////
|
||||||
|
template<class S,int Nrepresentation=Nc>
|
||||||
|
class WilsonImpl : public ImplGauge<S,Nrepresentation> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef ImplGauge<S,Nrepresentation> Gimpl;
|
||||||
|
|
||||||
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
|
|
||||||
|
template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
||||||
|
template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
||||||
|
template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >;
|
||||||
|
|
||||||
|
typedef iImplSpinor <Simd> SiteSpinor;
|
||||||
|
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||||
|
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||||
|
|
||||||
|
typedef Lattice<SiteSpinor> FermionField;
|
||||||
|
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||||
|
|
||||||
|
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
||||||
|
typedef WilsonImplParams ImplParams;
|
||||||
|
typedef CartesianStencil<SiteSpinor,SiteHalfSpinor,Compressor> StencilImpl;
|
||||||
|
ImplParams Params;
|
||||||
|
WilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
|
||||||
|
|
||||||
|
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){
|
||||||
|
mult(&phi(),&U(mu),&chi());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||||
|
{
|
||||||
|
conformable(Uds._grid,GaugeGrid);
|
||||||
|
conformable(Umu._grid,GaugeGrid);
|
||||||
|
GaugeLinkField U(GaugeGrid);
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
PokeIndex<LorentzIndex>(Uds,U,mu);
|
||||||
|
U = adj(Cshift(U,mu,-1));
|
||||||
|
PokeIndex<LorentzIndex>(Uds,U,mu+4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||||
|
GaugeLinkField link(mat._grid);
|
||||||
|
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
||||||
|
PokeIndex<LorentzIndex>(mat,link,mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
||||||
|
|
||||||
|
int Ls=Btilde._grid->_fdimensions[0];
|
||||||
|
|
||||||
|
GaugeLinkField tmp(mat._grid);
|
||||||
|
tmp = zero;
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int sss=0;sss<tmp._grid->oSites();sss++){
|
||||||
|
int sU=sss;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
int sF = s+Ls*sU;
|
||||||
|
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Flavour doubled spinors; is Gparity the only? what about C*?
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<class S,int Nrepresentation>
|
||||||
|
class GparityWilsonImpl : public ImplGauge<S,Nrepresentation> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef ImplGauge<S,Nrepresentation> Gimpl;
|
||||||
|
|
||||||
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
|
|
||||||
|
template<typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp >;
|
||||||
|
template<typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp >;
|
||||||
|
template<typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >, Ngp >;
|
||||||
|
|
||||||
|
typedef iImplSpinor <Simd> SiteSpinor;
|
||||||
|
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||||
|
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||||
|
|
||||||
|
typedef Lattice<SiteSpinor> FermionField;
|
||||||
|
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||||
|
|
||||||
|
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
||||||
|
typedef CartesianStencil<SiteSpinor,SiteHalfSpinor,Compressor> StencilImpl;
|
||||||
|
|
||||||
|
typedef GparityWilsonImplParams ImplParams;
|
||||||
|
ImplParams Params;
|
||||||
|
GparityWilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
|
||||||
|
|
||||||
|
|
||||||
|
// provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity
|
||||||
|
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){
|
||||||
|
|
||||||
|
typedef SiteHalfSpinor vobj;
|
||||||
|
typedef typename SiteHalfSpinor::scalar_object sobj;
|
||||||
|
|
||||||
|
vobj vtmp;
|
||||||
|
sobj stmp;
|
||||||
|
|
||||||
|
GridBase *grid = St._grid;
|
||||||
|
|
||||||
|
const int Nsimd = grid->Nsimd();
|
||||||
|
|
||||||
|
int direction = St._directions[mu];
|
||||||
|
int distance = St._distances[mu];
|
||||||
|
int ptype = St._permute_type[mu];
|
||||||
|
int sl = St._grid->_simd_layout[direction];
|
||||||
|
|
||||||
|
// Fixme X.Y.Z.T hardcode in stencil
|
||||||
|
int mmu = mu % Nd;
|
||||||
|
|
||||||
|
// assert our assumptions
|
||||||
|
assert((distance==1)||(distance==-1)); // nearest neighbour stencil hard code
|
||||||
|
assert((sl==1)||(sl==2));
|
||||||
|
|
||||||
|
std::vector<int> icoor;
|
||||||
|
|
||||||
|
if ( SE->_around_the_world && Params.twists[mmu] ) {
|
||||||
|
|
||||||
|
if ( sl == 2 ) {
|
||||||
|
|
||||||
|
std::vector<sobj> vals(Nsimd);
|
||||||
|
|
||||||
|
extract(chi,vals);
|
||||||
|
for(int s=0;s<Nsimd;s++){
|
||||||
|
|
||||||
|
grid->iCoorFromIindex(icoor,s);
|
||||||
|
|
||||||
|
assert((icoor[direction]==0)||(icoor[direction]==1));
|
||||||
|
|
||||||
|
int permute_lane;
|
||||||
|
if ( distance == 1) {
|
||||||
|
permute_lane = icoor[direction]?1:0;
|
||||||
|
} else {
|
||||||
|
permute_lane = icoor[direction]?0:1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( permute_lane ) {
|
||||||
|
stmp(0) = vals[s](1);
|
||||||
|
stmp(1) = vals[s](0);
|
||||||
|
vals[s] = stmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
merge(vtmp,vals);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
vtmp(0) = chi(1);
|
||||||
|
vtmp(1) = chi(0);
|
||||||
|
}
|
||||||
|
mult(&phi(0),&U(0)(mu),&vtmp(0));
|
||||||
|
mult(&phi(1),&U(1)(mu),&vtmp(1));
|
||||||
|
|
||||||
|
} else {
|
||||||
|
mult(&phi(0),&U(0)(mu),&chi(0));
|
||||||
|
mult(&phi(1),&U(1)(mu),&chi(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||||
|
{
|
||||||
|
|
||||||
|
conformable(Uds._grid,GaugeGrid);
|
||||||
|
conformable(Umu._grid,GaugeGrid);
|
||||||
|
|
||||||
|
GaugeLinkField Utmp(GaugeGrid);
|
||||||
|
GaugeLinkField U(GaugeGrid);
|
||||||
|
GaugeLinkField Uconj(GaugeGrid);
|
||||||
|
|
||||||
|
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||||
|
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
|
LatticeCoordinate(coor,mu);
|
||||||
|
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
Uconj = conjugate(U);
|
||||||
|
|
||||||
|
// This phase could come from a simple bc 1,1,-1,1 ..
|
||||||
|
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
|
||||||
|
if ( Params.twists[mu] ) {
|
||||||
|
Uconj = where(coor==neglink,-Uconj,Uconj);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(auto ss=U.begin();ss<U.end();ss++){
|
||||||
|
Uds[ss](0)(mu) = U[ss]();
|
||||||
|
Uds[ss](1)(mu) = Uconj[ss]();
|
||||||
|
}
|
||||||
|
|
||||||
|
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
||||||
|
Uconj = adj(Cshift(Uconj,mu,-1));
|
||||||
|
|
||||||
|
Utmp = U;
|
||||||
|
if ( Params.twists[mu] ) {
|
||||||
|
Utmp = where(coor==0,Uconj,Utmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(auto ss=U.begin();ss<U.end();ss++){
|
||||||
|
Uds[ss](0)(mu+4) = Utmp[ss]();
|
||||||
|
}
|
||||||
|
|
||||||
|
Utmp = Uconj;
|
||||||
|
if ( Params.twists[mu] ) {
|
||||||
|
Utmp = where(coor==0,U,Utmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(auto ss=U.begin();ss<U.end();ss++){
|
||||||
|
Uds[ss](1)(mu+4) = Utmp[ss]();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||||
|
assert(0);
|
||||||
|
// Fixme
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||||
|
assert(0);
|
||||||
|
// Fixme
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef WilsonImpl<vComplex ,Nc> WilsonImplR; // Real.. whichever prec
|
||||||
|
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
|
||||||
|
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
|
||||||
|
|
||||||
|
typedef GparityWilsonImpl<vComplex ,Nc> GparityWilsonImplR; // Real.. whichever prec
|
||||||
|
typedef GparityWilsonImpl<vComplexF,Nc> GparityWilsonImplF; // Float
|
||||||
|
typedef GparityWilsonImpl<vComplexD,Nc> GparityWilsonImplD; // Double
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
@ -7,35 +7,38 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class MobiusFermion : public CayleyFermion5D
|
template<class Impl>
|
||||||
|
class MobiusFermion : public CayleyFermion5D<Impl>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
virtual void Instantiatable(void) {};
|
virtual void Instantiatable(void) {};
|
||||||
// Constructors
|
// Constructors
|
||||||
MobiusFermion(LatticeGaugeField &_Umu,
|
MobiusFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,
|
RealD _mass,RealD _M5,
|
||||||
RealD b, RealD c) :
|
RealD b, RealD c,const ImplParams &p= ImplParams()) :
|
||||||
|
|
||||||
CayleyFermion5D(_Umu,
|
CayleyFermion5D<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_mass,_M5)
|
FourDimRedBlackGrid,_mass,_M5,p)
|
||||||
|
|
||||||
{
|
{
|
||||||
RealD eps = 1.0;
|
RealD eps = 1.0;
|
||||||
|
|
||||||
std::cout << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Tanh approx"<<std::endl;
|
std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
|
||||||
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
|
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
|
||||||
assert(zdata->n==this->Ls);
|
assert(zdata->n==this->Ls);
|
||||||
|
|
||||||
// Call base setter
|
// Call base setter
|
||||||
this->CayleyFermion5D::SetCoefficientsTanh(zdata,b,c);
|
this->SetCoefficientsTanh(zdata,b,c);
|
||||||
|
|
||||||
Approx::zolotarev_free(zdata);
|
Approx::zolotarev_free(zdata);
|
||||||
|
|
||||||
|
@ -7,26 +7,29 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class MobiusZolotarevFermion : public CayleyFermion5D
|
template<class Impl>
|
||||||
|
class MobiusZolotarevFermion : public CayleyFermion5D<Impl>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
virtual void Instantiatable(void) {};
|
virtual void Instantiatable(void) {};
|
||||||
// Constructors
|
// Constructors
|
||||||
MobiusZolotarevFermion(LatticeGaugeField &_Umu,
|
MobiusZolotarevFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,
|
RealD _mass,RealD _M5,
|
||||||
RealD b, RealD c,
|
RealD b, RealD c,
|
||||||
RealD lo, RealD hi) :
|
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
|
||||||
|
|
||||||
CayleyFermion5D(_Umu,
|
CayleyFermion5D<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_mass,_M5)
|
FourDimRedBlackGrid,_mass,_M5,p)
|
||||||
|
|
||||||
{
|
{
|
||||||
RealD eps = lo/hi;
|
RealD eps = lo/hi;
|
||||||
@ -34,10 +37,10 @@ namespace Grid {
|
|||||||
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0);
|
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0);
|
||||||
assert(zdata->n==this->Ls);
|
assert(zdata->n==this->Ls);
|
||||||
|
|
||||||
std::cout << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
|
std::cout<<GridLogMessage << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
|
||||||
|
|
||||||
// Call base setter
|
// Call base setter
|
||||||
this->CayleyFermion5D::SetCoefficientsZolotarev(hi,zdata,b,c);
|
this->SetCoefficientsZolotarev(hi,zdata,b,c);
|
||||||
|
|
||||||
Approx::zolotarev_free(zdata);
|
Approx::zolotarev_free(zdata);
|
||||||
}
|
}
|
||||||
|
@ -7,25 +7,28 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class OverlapWilsonCayleyTanhFermion : public MobiusFermion
|
template<class Impl>
|
||||||
|
class OverlapWilsonCayleyTanhFermion : public MobiusFermion<Impl>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
OverlapWilsonCayleyTanhFermion(LatticeGaugeField &_Umu,
|
OverlapWilsonCayleyTanhFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,
|
RealD _mass,RealD _M5,
|
||||||
RealD scale) :
|
RealD scale,const ImplParams &p= ImplParams()) :
|
||||||
|
|
||||||
// b+c=scale, b-c = 0 <=> b =c = scale/2
|
// b+c=scale, b-c = 0 <=> b =c = scale/2
|
||||||
MobiusFermion(_Umu,
|
MobiusFermion<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale)
|
FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale,p)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -7,25 +7,28 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion
|
template<class Impl>
|
||||||
|
class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion<Impl>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
|
|
||||||
OverlapWilsonCayleyZolotarevFermion(LatticeGaugeField &_Umu,
|
OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,
|
RealD _mass,RealD _M5,
|
||||||
RealD lo, RealD hi) :
|
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
|
||||||
// b+c=1.0, b-c = 0 <=> b =c = 1/2
|
// b+c=1.0, b-c = 0 <=> b =c = 1/2
|
||||||
MobiusZolotarevFermion(_Umu,
|
MobiusZolotarevFermion<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi)
|
FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi,p)
|
||||||
|
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
@ -7,31 +7,34 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D
|
template<class Impl>
|
||||||
|
class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D<Impl>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
virtual void Instantiatable(void){};
|
virtual void Instantiatable(void){};
|
||||||
// Constructors
|
// Constructors
|
||||||
OverlapWilsonContFracTanhFermion(LatticeGaugeField &_Umu,
|
OverlapWilsonContFracTanhFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,
|
RealD _mass,RealD _M5,
|
||||||
RealD scale) :
|
RealD scale,const ImplParams &p= ImplParams()) :
|
||||||
|
|
||||||
// b+c=scale, b-c = 0 <=> b =c = scale/2
|
// b+c=scale, b-c = 0 <=> b =c = scale/2
|
||||||
ContinuedFractionFermion5D(_Umu,
|
ContinuedFractionFermion5D<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_mass,_M5)
|
FourDimRedBlackGrid,_mass,_M5,p)
|
||||||
{
|
{
|
||||||
assert((Ls&0x1)==1); // Odd Ls required
|
assert((this->Ls&0x1)==1); // Odd Ls required
|
||||||
int nrational=Ls-1;// Even rational order
|
int nrational=this->Ls-1;// Even rational order
|
||||||
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
|
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
|
||||||
SetCoefficientsTanh(zdata,scale);
|
this->SetCoefficientsTanh(zdata,scale);
|
||||||
Approx::zolotarev_free(zdata);
|
Approx::zolotarev_free(zdata);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -7,34 +7,36 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D
|
template<class Impl>
|
||||||
|
class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D<Impl>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
|
||||||
virtual void Instantiatable(void){};
|
virtual void Instantiatable(void){};
|
||||||
// Constructors
|
// Constructors
|
||||||
OverlapWilsonContFracZolotarevFermion(LatticeGaugeField &_Umu,
|
OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,
|
RealD _mass,RealD _M5,
|
||||||
RealD lo,RealD hi):
|
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
|
||||||
|
|
||||||
// b+c=scale, b-c = 0 <=> b =c = scale/2
|
// b+c=scale, b-c = 0 <=> b =c = scale/2
|
||||||
ContinuedFractionFermion5D(_Umu,
|
ContinuedFractionFermion5D<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_mass,_M5)
|
FourDimRedBlackGrid,_mass,_M5,p)
|
||||||
{
|
{
|
||||||
assert((Ls&0x1)==1); // Odd Ls required
|
assert((this->Ls&0x1)==1); // Odd Ls required
|
||||||
|
|
||||||
int nrational=Ls;// Odd rational order
|
int nrational=this->Ls;// Odd rational order
|
||||||
RealD eps = lo/hi;
|
RealD eps = lo/hi;
|
||||||
|
|
||||||
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
|
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
|
||||||
SetCoefficientsZolotarev(hi,zdata);
|
this->SetCoefficientsZolotarev(hi,zdata);
|
||||||
Approx::zolotarev_free(zdata);
|
Approx::zolotarev_free(zdata);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -7,31 +7,34 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D
|
template<class Impl>
|
||||||
|
class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D<Impl>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
virtual void Instantiatable(void){};
|
virtual void Instantiatable(void){};
|
||||||
// Constructors
|
// Constructors
|
||||||
OverlapWilsonPartialFractionTanhFermion(LatticeGaugeField &_Umu,
|
OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,
|
RealD _mass,RealD _M5,
|
||||||
RealD scale) :
|
RealD scale,const ImplParams &p= ImplParams()) :
|
||||||
|
|
||||||
// b+c=scale, b-c = 0 <=> b =c = scale/2
|
// b+c=scale, b-c = 0 <=> b =c = scale/2
|
||||||
PartialFractionFermion5D(_Umu,
|
PartialFractionFermion5D<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_mass,_M5)
|
FourDimRedBlackGrid,_mass,_M5,p)
|
||||||
{
|
{
|
||||||
assert((Ls&0x1)==1); // Odd Ls required
|
assert((this->Ls&0x1)==1); // Odd Ls required
|
||||||
int nrational=Ls-1;// Even rational order
|
int nrational=this->Ls-1;// Even rational order
|
||||||
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
|
Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
|
||||||
SetCoefficientsTanh(zdata,scale);
|
this->SetCoefficientsTanh(zdata,scale);
|
||||||
Approx::zolotarev_free(zdata);
|
Approx::zolotarev_free(zdata);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -7,34 +7,36 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D
|
template<class Impl>
|
||||||
|
class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D<Impl>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
|
||||||
virtual void Instantiatable(void){};
|
virtual void Instantiatable(void){};
|
||||||
// Constructors
|
// Constructors
|
||||||
OverlapWilsonPartialFractionZolotarevFermion(LatticeGaugeField &_Umu,
|
OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,
|
RealD _mass,RealD _M5,
|
||||||
RealD lo,RealD hi):
|
RealD lo,RealD hi,const ImplParams &p= ImplParams()):
|
||||||
|
|
||||||
// b+c=scale, b-c = 0 <=> b =c = scale/2
|
// b+c=scale, b-c = 0 <=> b =c = scale/2
|
||||||
PartialFractionFermion5D(_Umu,
|
PartialFractionFermion5D<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_mass,_M5)
|
FourDimRedBlackGrid,_mass,_M5,p)
|
||||||
{
|
{
|
||||||
assert((Ls&0x1)==1); // Odd Ls required
|
assert((this->Ls&0x1)==1); // Odd Ls required
|
||||||
|
|
||||||
int nrational=Ls;// Odd rational order
|
int nrational=this->Ls;// Odd rational order
|
||||||
RealD eps = lo/hi;
|
RealD eps = lo/hi;
|
||||||
|
|
||||||
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
|
Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
|
||||||
SetCoefficientsZolotarev(hi,zdata);
|
this->SetCoefficientsZolotarev(hi,zdata);
|
||||||
Approx::zolotarev_free(zdata);
|
Approx::zolotarev_free(zdata);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -2,12 +2,15 @@
|
|||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
void PartialFractionFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
|
|
||||||
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
||||||
// this does both dag and undag but is trivial; make a common helper routing
|
// this does both dag and undag but is trivial; make a common helper routing
|
||||||
|
|
||||||
int sign = 1;
|
int sign = 1;
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
DhopDir(psi,chi,dir,disp);
|
this->DhopDir(psi,chi,dir,disp);
|
||||||
|
|
||||||
int nblock=(Ls-1)/2;
|
int nblock=(Ls-1)/2;
|
||||||
for(int b=0;b<nblock;b++){
|
for(int b=0;b<nblock;b++){
|
||||||
@ -18,15 +21,16 @@ namespace Grid {
|
|||||||
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
|
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
|
||||||
|
|
||||||
}
|
}
|
||||||
void PartialFractionFermion5D::Meooe_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
|
||||||
{
|
{
|
||||||
// this does both dag and undag but is trivial; make a common helper routing
|
int Ls = this->Ls;
|
||||||
int sign = dag ? (-1) : 1;
|
int sign = dag ? (-1) : 1;
|
||||||
|
|
||||||
if ( psi.checkerboard == Odd ) {
|
if ( psi.checkerboard == Odd ) {
|
||||||
DhopEO(psi,chi,DaggerNo);
|
this->DhopEO(psi,chi,DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
DhopOE(psi,chi,DaggerNo);
|
this->DhopOE(psi,chi,DaggerNo);
|
||||||
}
|
}
|
||||||
|
|
||||||
int nblock=(Ls-1)/2;
|
int nblock=(Ls-1)/2;
|
||||||
@ -38,10 +42,12 @@ namespace Grid {
|
|||||||
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
|
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PartialFractionFermion5D::Mooee_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::Mooee_internal(const FermionField &psi, FermionField &chi,int dag)
|
||||||
{
|
{
|
||||||
// again dag and undag are trivially related
|
// again dag and undag are trivially related
|
||||||
int sign = dag ? (-1) : 1;
|
int sign = dag ? (-1) : 1;
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
int nblock=(Ls-1)/2;
|
int nblock=(Ls-1)/2;
|
||||||
for(int b=0;b<nblock;b++){
|
for(int b=0;b<nblock;b++){
|
||||||
@ -69,11 +75,13 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PartialFractionFermion5D::MooeeInv_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::MooeeInv_internal(const FermionField &psi, FermionField &chi,int dag)
|
||||||
{
|
{
|
||||||
int sign = dag ? (-1) : 1;
|
int sign = dag ? (-1) : 1;
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
LatticeFermion tmp(psi._grid);
|
FermionField tmp(psi._grid);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
//Linv
|
//Linv
|
||||||
@ -129,10 +137,12 @@ namespace Grid {
|
|||||||
axpby_ssp (chi, 1.0/scale,tmp,0.0,tmp,Ls-1,Ls-1);
|
axpby_ssp (chi, 1.0/scale,tmp,0.0,tmp,Ls-1,Ls-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PartialFractionFermion5D::M_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::M_internal(const FermionField &psi, FermionField &chi,int dag)
|
||||||
{
|
{
|
||||||
LatticeFermion D(psi._grid);
|
FermionField D(psi._grid);
|
||||||
|
|
||||||
|
int Ls = this->Ls;
|
||||||
int sign = dag ? (-1) : 1;
|
int sign = dag ? (-1) : 1;
|
||||||
|
|
||||||
// For partial frac Hw case (b5=c5=1) chroma quirkily computes
|
// For partial frac Hw case (b5=c5=1) chroma quirkily computes
|
||||||
@ -186,7 +196,7 @@ namespace Grid {
|
|||||||
// ( 0 -sqrt(p_i)*amax | 2 R gamma_5 + p0/amax 2H
|
// ( 0 -sqrt(p_i)*amax | 2 R gamma_5 + p0/amax 2H
|
||||||
//
|
//
|
||||||
|
|
||||||
DW(psi,D,DaggerNo);
|
this->DW(psi,D,DaggerNo);
|
||||||
|
|
||||||
int nblock=(Ls-1)/2;
|
int nblock=(Ls-1)/2;
|
||||||
for(int b=0;b<nblock;b++){
|
for(int b=0;b<nblock;b++){
|
||||||
@ -217,61 +227,127 @@ namespace Grid {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RealD PartialFractionFermion5D::M (const LatticeFermion &in, LatticeFermion &out)
|
template<class Impl>
|
||||||
|
RealD PartialFractionFermion5D<Impl>::M (const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
M_internal(in,out,DaggerNo);
|
M_internal(in,out,DaggerNo);
|
||||||
return norm2(out);
|
return norm2(out);
|
||||||
}
|
}
|
||||||
RealD PartialFractionFermion5D::Mdag (const LatticeFermion &in, LatticeFermion &out)
|
template<class Impl>
|
||||||
|
RealD PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
M_internal(in,out,DaggerYes);
|
M_internal(in,out,DaggerYes);
|
||||||
return norm2(out);
|
return norm2(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PartialFractionFermion5D::Meooe (const LatticeFermion &in, LatticeFermion &out)
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::Meooe (const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
Meooe_internal(in,out,DaggerNo);
|
Meooe_internal(in,out,DaggerNo);
|
||||||
}
|
}
|
||||||
void PartialFractionFermion5D::MeooeDag (const LatticeFermion &in, LatticeFermion &out)
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::MeooeDag (const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
Meooe_internal(in,out,DaggerYes);
|
Meooe_internal(in,out,DaggerYes);
|
||||||
}
|
}
|
||||||
void PartialFractionFermion5D::Mooee (const LatticeFermion &in, LatticeFermion &out)
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::Mooee (const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
Mooee_internal(in,out,DaggerNo);
|
Mooee_internal(in,out,DaggerNo);
|
||||||
}
|
}
|
||||||
void PartialFractionFermion5D::MooeeDag (const LatticeFermion &in, LatticeFermion &out)
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::MooeeDag (const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
Mooee_internal(in,out,DaggerYes);
|
Mooee_internal(in,out,DaggerYes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PartialFractionFermion5D::MooeeInv (const LatticeFermion &in, LatticeFermion &out)
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::MooeeInv (const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
MooeeInv_internal(in,out,DaggerNo);
|
MooeeInv_internal(in,out,DaggerNo);
|
||||||
}
|
}
|
||||||
void PartialFractionFermion5D::MooeeInvDag (const LatticeFermion &in, LatticeFermion &out)
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::MooeeInvDag (const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
MooeeInv_internal(in,out,DaggerYes);
|
MooeeInv_internal(in,out,DaggerYes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PartialFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
|
|
||||||
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
|
FermionField D(V._grid);
|
||||||
|
|
||||||
|
int nblock=(Ls-1)/2;
|
||||||
|
for(int b=0;b<nblock;b++){
|
||||||
|
int s = 2*b;
|
||||||
|
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
|
||||||
|
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
|
||||||
|
}
|
||||||
|
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
|
||||||
|
|
||||||
|
this->DhopDeriv(mat,D,V,DaggerNo);
|
||||||
|
};
|
||||||
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
|
FermionField D(V._grid);
|
||||||
|
|
||||||
|
int nblock=(Ls-1)/2;
|
||||||
|
for(int b=0;b<nblock;b++){
|
||||||
|
int s = 2*b;
|
||||||
|
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
|
||||||
|
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
|
||||||
|
}
|
||||||
|
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
|
||||||
|
|
||||||
|
this->DhopDerivOE(mat,D,V,DaggerNo);
|
||||||
|
};
|
||||||
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
|
FermionField D(V._grid);
|
||||||
|
|
||||||
|
int nblock=(Ls-1)/2;
|
||||||
|
for(int b=0;b<nblock;b++){
|
||||||
|
int s = 2*b;
|
||||||
|
ag5xpby_ssp(D,-scale,U,0.0,U,s,s);
|
||||||
|
ag5xpby_ssp(D, scale,U,0.0,U,s+1,s+1);
|
||||||
|
}
|
||||||
|
ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
|
||||||
|
|
||||||
|
this->DhopDerivEO(mat,D,V,DaggerNo);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
|
||||||
SetCoefficientsZolotarev(1.0/scale,zdata);
|
SetCoefficientsZolotarev(1.0/scale,zdata);
|
||||||
}
|
}
|
||||||
void PartialFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
|
||||||
|
|
||||||
// check on degree matching
|
// check on degree matching
|
||||||
// std::cout << Ls << " Ls"<<std::endl;
|
// std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
|
||||||
// std::cout << zdata->n << " - n"<<std::endl;
|
// std::cout<<GridLogMessage << zdata->n << " - n"<<std::endl;
|
||||||
// std::cout << zdata->da << " -da "<<std::endl;
|
// std::cout<<GridLogMessage << zdata->da << " -da "<<std::endl;
|
||||||
// std::cout << zdata->db << " -db"<<std::endl;
|
// std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
|
||||||
// std::cout << zdata->dn << " -dn"<<std::endl;
|
// std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
|
||||||
// std::cout << zdata->dd << " -dd"<<std::endl;
|
// std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
assert(Ls == (2*zdata->da -1) );
|
assert(Ls == (2*zdata->da -1) );
|
||||||
|
|
||||||
// Part frac
|
// Part frac
|
||||||
// RealD R;
|
// RealD R;
|
||||||
R=(1+mass)/(1-mass);
|
R=(1+mass)/(1-mass);
|
||||||
dw_diag = (4.0-M5);
|
dw_diag = (4.0-this->M5);
|
||||||
|
|
||||||
// std::vector<RealD> p;
|
// std::vector<RealD> p;
|
||||||
// std::vector<RealD> q;
|
// std::vector<RealD> q;
|
||||||
@ -291,18 +367,22 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
PartialFractionFermion5D::PartialFractionFermion5D(LatticeGaugeField &_Umu,
|
template<class Impl>
|
||||||
GridCartesian &FiveDimGrid,
|
PartialFractionFermion5D<Impl>::PartialFractionFermion5D(GaugeField &_Umu,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridCartesian &FourDimGrid,
|
||||||
RealD _mass,RealD M5) :
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
WilsonFermion5D(_Umu,
|
RealD _mass,RealD M5,
|
||||||
FiveDimGrid, FiveDimRedBlackGrid,
|
const ImplParams &p) :
|
||||||
FourDimGrid, FourDimRedBlackGrid,M5),
|
WilsonFermion5D<Impl>(_Umu,
|
||||||
|
FiveDimGrid, FiveDimRedBlackGrid,
|
||||||
|
FourDimGrid, FourDimRedBlackGrid,M5,p),
|
||||||
mass(_mass)
|
mass(_mass)
|
||||||
|
|
||||||
{
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
assert((Ls&0x1)==1); // Odd Ls required
|
assert((Ls&0x1)==1); // Odd Ls required
|
||||||
int nrational=Ls-1;
|
int nrational=Ls-1;
|
||||||
|
|
||||||
@ -321,6 +401,8 @@ namespace Grid {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FermOpTemplateInstantiate(PartialFractionFermion5D);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,41 +5,48 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class PartialFractionFermion5D : public WilsonFermion5D
|
template<class Impl>
|
||||||
|
class PartialFractionFermion5D : public WilsonFermion5D<Impl>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
|
||||||
const int part_frac_chroma_convention=1;
|
const int part_frac_chroma_convention=1;
|
||||||
|
|
||||||
void Meooe_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
|
void Meooe_internal(const FermionField &in, FermionField &out,int dag);
|
||||||
void Mooee_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
|
void Mooee_internal(const FermionField &in, FermionField &out,int dag);
|
||||||
void MooeeInv_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
|
void MooeeInv_internal(const FermionField &in, FermionField &out,int dag);
|
||||||
void M_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
|
void M_internal(const FermionField &in, FermionField &out,int dag);
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
virtual RealD M (const LatticeFermion &in, LatticeFermion &out);
|
virtual RealD M (const FermionField &in, FermionField &out);
|
||||||
virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out);
|
virtual RealD Mdag (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
// half checkerboard operaions
|
// half checkerboard operaions
|
||||||
virtual void Meooe (const LatticeFermion &in, LatticeFermion &out);
|
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||||
virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MeooeDag (const FermionField &in, FermionField &out);
|
||||||
virtual void Mooee (const LatticeFermion &in, LatticeFermion &out);
|
virtual void Mooee (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
|
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
|
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
|
||||||
virtual void Instantiatable(void) =0; // ensure no make-eee
|
virtual void Instantiatable(void) =0; // ensure no make-eee
|
||||||
|
|
||||||
// Efficient support for multigrid coarsening
|
// Efficient support for multigrid coarsening
|
||||||
virtual void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
PartialFractionFermion5D(LatticeGaugeField &_Umu,
|
PartialFractionFermion5D(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD M5);
|
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
@ -7,12 +7,14 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class ScaledShamirFermion : public MobiusFermion
|
template<class Impl>
|
||||||
|
class ScaledShamirFermion : public MobiusFermion<Impl>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
ScaledShamirFermion(LatticeGaugeField &_Umu,
|
ScaledShamirFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
@ -21,7 +23,7 @@ namespace Grid {
|
|||||||
RealD scale) :
|
RealD scale) :
|
||||||
|
|
||||||
// b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1
|
// b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1
|
||||||
MobiusFermion(_Umu,
|
MobiusFermion<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
|
@ -7,27 +7,29 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class ShamirZolotarevFermion : public MobiusZolotarevFermion
|
template<class Impl>
|
||||||
|
class ShamirZolotarevFermion : public MobiusZolotarevFermion<Impl>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
|
|
||||||
|
|
||||||
ShamirZolotarevFermion(LatticeGaugeField &_Umu,
|
ShamirZolotarevFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,
|
RealD _mass,RealD _M5,
|
||||||
RealD lo, RealD hi) :
|
RealD lo, RealD hi,const ImplParams &p= ImplParams()) :
|
||||||
|
|
||||||
// b+c = 1; b-c = 1 => b=1, c=0
|
// b+c = 1; b-c = 1 => b=1, c=0
|
||||||
MobiusZolotarevFermion(_Umu,
|
MobiusZolotarevFermion<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi)
|
FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi,p)
|
||||||
|
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user