mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
commit
d9f2e2e06a
119
TODO
119
TODO
@ -1,48 +1,48 @@
|
|||||||
- PseudoFermions
|
RECENT
|
||||||
|
---------------
|
||||||
|
|
||||||
Done: Cayley, Partial , ContFrac force terms.
|
- Clean up HMC -- DONE
|
||||||
|
- LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE
|
||||||
Done:
|
- Simplified the integrators a bit. -- DONE
|
||||||
- TwoFlavour
|
- Multi-timescale looks broken and operating on single timescale for now. -- DONE
|
||||||
- TwoFlavourEvenOdd
|
- pass GaugeField as template param. -- DONE
|
||||||
- TwoFlavourRatio
|
- Reunitarise -- DONE
|
||||||
- TwoFlavourRatioEvenOdd
|
- Force Gradient -- DONE
|
||||||
|
- Prefer "RefreshInternal" or such like to "init" in naming -- DONE
|
||||||
Done:
|
- Parallel io improvements -- DONE
|
||||||
- OneFlavourRationalEvenOdd
|
- Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
|
||||||
- OneFlavourRationalRatioEvenOdd
|
|
||||||
- OneFlavourRationalRatio
|
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
=> generalise to non-const EE
|
---------------
|
||||||
=> Test DWF HMC
|
Policies:
|
||||||
=> Clean up HMC
|
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
|
||||||
- Fix a threading bug that has been introduced and prevents HMC running hybrid OMP mode
|
* Support different boundary conditions (finite temp, chem. potential ... )
|
||||||
|
* Support different fermion representations?
|
||||||
|
- contained entirely within the integrator presently
|
||||||
|
|
||||||
=> Integrators
|
- Sign of force term.
|
||||||
- Force Gradient
|
|
||||||
- Multi-timescale looks broken and operating on single timescale for now.
|
|
||||||
Fix/debug/rewrite this
|
|
||||||
- Sign of force term.
|
|
||||||
- Prefer "RefreshInternal" or such like to "init" in naming
|
|
||||||
- Rename "Ta" as too unclear
|
|
||||||
|
|
||||||
- MacroMagic -> virtual reader class.
|
- Reversibility test.
|
||||||
|
|
||||||
- Link smearing/boundary conds; Policy class based implementation
|
- Rename "Ta" as too unclear
|
||||||
|
|
||||||
|
- Lanczos
|
||||||
|
|
||||||
- Rectangle gauge actions.
|
- Rectangle gauge actions.
|
||||||
Iwasaki,
|
Iwasaki,
|
||||||
Symanzik,
|
Symanzik,
|
||||||
... etc...
|
... etc...
|
||||||
|
|
||||||
- Prepare multigrid for HMC.
|
- Prepare multigrid for HMC. - Alternate setup schemes.
|
||||||
Alternate setup schemes.
|
|
||||||
|
|
||||||
- RNG filling from sparser grid, lower dim grid.
|
- Support for ILDG --- ugly, not done
|
||||||
|
|
||||||
|
- Flavour matrices?
|
||||||
|
|
||||||
|
- FFTnD ?
|
||||||
|
|
||||||
================================================================
|
================================================================
|
||||||
*** Hacks and bug fixes to clean up and Audits
|
* Hacks and bug fixes to clean up and Audits
|
||||||
================================================================
|
================================================================
|
||||||
|
|
||||||
* Extract/merge/set cleanup ; too many variants; rationalise and call simpler ones
|
* Extract/merge/set cleanup ; too many variants; rationalise and call simpler ones
|
||||||
@ -68,6 +68,8 @@ Insert/Extract
|
|||||||
|
|
||||||
* Thread scaling tests Xeon, XeonPhi
|
* Thread scaling tests Xeon, XeonPhi
|
||||||
|
|
||||||
|
Not sure of status of this -- reverify. Things are working nicely now though.
|
||||||
|
|
||||||
* Make the Tensor types and Complex etc... play more nicely.
|
* Make the Tensor types and Complex etc... play more nicely.
|
||||||
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
||||||
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
||||||
@ -84,6 +86,37 @@ Insert/Extract
|
|||||||
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
|
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
|
||||||
pure scalar.
|
pure scalar.
|
||||||
|
|
||||||
|
======================================================================
|
||||||
|
======================================================================
|
||||||
|
======================================================================
|
||||||
|
======================================================================
|
||||||
|
Done: Cayley, Partial , ContFrac force terms.
|
||||||
|
|
||||||
|
DONE
|
||||||
|
- PseudoFermions
|
||||||
|
=> generalise to non-const EE ; likely defer (??) (NOT DONE)
|
||||||
|
Done:
|
||||||
|
- TwoFlavour
|
||||||
|
- TwoFlavourEvenOdd
|
||||||
|
- TwoFlavourRatio
|
||||||
|
- TwoFlavourRatioEvenOdd
|
||||||
|
|
||||||
|
Done:
|
||||||
|
- OneFlavourRationalEvenOdd
|
||||||
|
- OneFlavourRationalRatioEvenOdd
|
||||||
|
- OneFlavourRationalRatio
|
||||||
|
|
||||||
|
Done
|
||||||
|
=> Test DWF HMC
|
||||||
|
- Fix a threading bug that has been introduced and prevents HMC running hybrid OMP mode
|
||||||
|
|
||||||
|
Done:
|
||||||
|
- RNG filling from sparser grid, lower dim grid.
|
||||||
|
|
||||||
|
|
||||||
|
DONE
|
||||||
|
- MacroMagic -> virtual reader class.
|
||||||
|
|
||||||
*** Expression template engine: -- DONE
|
*** Expression template engine: -- DONE
|
||||||
|
|
||||||
[ -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
|
[ -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
|
||||||
@ -103,18 +136,10 @@ Insert/Extract
|
|||||||
|
|
||||||
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
||||||
|
|
||||||
* Parallel io improvements
|
-- coherent framework for implementing actions and their forces.
|
||||||
- optional parallel MPI2 IO
|
Actions
|
||||||
- move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
|
|
||||||
|
|
||||||
* Support for ILDG
|
|
||||||
|
|
||||||
* Support different boundary conditions (finite temp, chem. potential ... )
|
|
||||||
|
|
||||||
* Support different fermion representations?
|
|
||||||
|
|
||||||
Actions -- coherent framework for implementing actions and their forces.
|
|
||||||
|
|
||||||
|
DONE
|
||||||
* Fermion
|
* Fermion
|
||||||
- Wilson
|
- Wilson
|
||||||
- Clover
|
- Clover
|
||||||
@ -122,6 +147,7 @@ Actions -- coherent framework for implementing actions and their forces.
|
|||||||
- Mobius
|
- Mobius
|
||||||
- z-Mobius
|
- z-Mobius
|
||||||
|
|
||||||
|
|
||||||
Algorithms (lots of reuse/port from BFM)
|
Algorithms (lots of reuse/port from BFM)
|
||||||
* LinearOperator
|
* LinearOperator
|
||||||
* LinearSolver
|
* LinearSolver
|
||||||
@ -139,19 +165,11 @@ Algorithms (lots of reuse/port from BFM)
|
|||||||
* Integrators, leapfrog, omelyan, force gradient etc...
|
* Integrators, leapfrog, omelyan, force gradient etc...
|
||||||
* etc..
|
* etc..
|
||||||
|
|
||||||
* Gauge
|
Done
|
||||||
- Wilson, symanzik, iwasaki
|
|
||||||
|
|
||||||
* Flavour matrices?
|
|
||||||
|
|
||||||
* Pauli, SU subgroup, etc..
|
* Pauli, SU subgroup, etc..
|
||||||
|
|
||||||
* su3 exponentiation & log etc.. [Jamie's code?]
|
* su3 exponentiation & log etc.. [Jamie's code?]
|
||||||
|
|
||||||
* TaProj
|
|
||||||
|
|
||||||
* FFTnD ?
|
|
||||||
|
|
||||||
======================================================================================================
|
======================================================================================================
|
||||||
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
|
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
|
||||||
======================================================================================================
|
======================================================================================================
|
||||||
@ -184,7 +202,6 @@ FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me argua
|
|||||||
- lib/communicator
|
- lib/communicator
|
||||||
- lib/algorithms
|
- lib/algorithms
|
||||||
- lib/qcd
|
- lib/qcd
|
||||||
future
|
|
||||||
- lib/io/ -- GridLog, GridIn, GridErr, GridDebug, GridMessage
|
- lib/io/ -- GridLog, GridIn, GridErr, GridDebug, GridMessage
|
||||||
- lib/qcd/actions
|
- lib/qcd/actions
|
||||||
- lib/qcd/measurements
|
- lib/qcd/measurements
|
||||||
|
84
benchmarks/Benchmark_memory_asynch.cc
Normal file
84
benchmarks/Benchmark_memory_asynch.cc
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Nvec=4;
|
||||||
|
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
||||||
|
typedef iVector<vReal,Nvec> Vec;
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
||||||
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking READ bandwidth"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"bytes/thread"<<"\t\t\t"<<"GB/s"<<"\t\t\t"<<"GB/s per thread"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
|
const int lmax = 16536*16;
|
||||||
|
for(int lat=4;lat<=lmax;lat*=2){
|
||||||
|
|
||||||
|
int Nloop=lmax*128*4/lat;
|
||||||
|
|
||||||
|
std::vector<int> latt_size ({2*mpi_layout[0],2*mpi_layout[1],4*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]*threads;
|
||||||
|
|
||||||
|
Vec tsum; tsum = zero;
|
||||||
|
|
||||||
|
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||||
|
|
||||||
|
std::vector<double> stop(threads);
|
||||||
|
Vector<Vec> sum(threads);
|
||||||
|
|
||||||
|
std::vector<LatticeVec> x(threads,&Grid);
|
||||||
|
for(int t=0;t<threads;t++){
|
||||||
|
random(pRNG,x[t]);
|
||||||
|
}
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int t=0;t<threads;t++){
|
||||||
|
|
||||||
|
sum[t] = x[t]._odata[0];
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
for(auto ss=x[t].begin();ss<x[t].end();ss++){
|
||||||
|
sum[t] = sum[t]+x[t]._odata[ss];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stop[t]=usecond();
|
||||||
|
}
|
||||||
|
|
||||||
|
double max_stop=stop[0];
|
||||||
|
double min_stop=stop[0];
|
||||||
|
|
||||||
|
for(int t=0;t<threads;t++){
|
||||||
|
tsum+=sum[t];
|
||||||
|
if ( stop[t]<min_stop ) min_stop=stop[t];
|
||||||
|
if ( stop[t]>max_stop ) max_stop=stop[t];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double max_time = (max_stop-start)/Nloop*1000;
|
||||||
|
double min_time = (min_stop-start)/Nloop*1000;
|
||||||
|
|
||||||
|
double bytes=vol*Nvec*sizeof(Real);
|
||||||
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/threads<<"\t\t"<<bytes/max_time<<" - "<< bytes/min_time<<"\t\t"<<bytes/min_time/threads <<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -10,144 +10,158 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
const int Nvec=8;
|
const int Nvec=8;
|
||||||
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
||||||
|
typedef iVector<vReal,Nvec> Vec;
|
||||||
|
|
||||||
int Nloop=1000;
|
|
||||||
|
Vec rn = zero;
|
||||||
|
|
||||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking fused AXPY bandwidth ; sizeof(Real) "<<sizeof(Real)<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking fused AXPY bandwidth ; sizeof(Real) "<<sizeof(Real)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
uint64_t lmax=44;
|
||||||
for(int lat=4;lat<=32;lat+=4){
|
#define NLOOP (100*lmax*lmax*lmax*lmax/vol)
|
||||||
|
for(int lat=4;lat<=lmax;lat+=4){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
uint64_t Nloop=NLOOP;
|
||||||
|
|
||||||
LatticeVec z(&Grid); //random(pRNG,z);
|
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||||
LatticeVec x(&Grid); //random(pRNG,x);
|
|
||||||
LatticeVec y(&Grid); //random(pRNG,y);
|
LatticeVec z(&Grid); random(pRNG,z);
|
||||||
|
LatticeVec x(&Grid); random(pRNG,x);
|
||||||
|
LatticeVec y(&Grid); random(pRNG,y);
|
||||||
double a=2.0;
|
double a=2.0;
|
||||||
|
|
||||||
|
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
// inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
|
||||||
axpy(z,a,x,y);
|
axpy(z,a,x,y);
|
||||||
|
x._odata[0]=z._odata[0]; // serial loop dependence to prevent optimise
|
||||||
|
y._odata[4]=z._odata[4];
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000;
|
double time = (stop-start)/Nloop*1000;
|
||||||
|
|
||||||
double flops=vol*Nvec*2;// mul,add
|
double flops=vol*Nvec*2;// mul,add
|
||||||
double bytes=3*vol*Nvec*sizeof(Real);
|
double bytes=3*vol*Nvec*sizeof(Real);
|
||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=4){
|
for(int lat=4;lat<=lmax;lat+=4){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||||
|
|
||||||
LatticeVec z(&Grid); //random(pRNG,z);
|
LatticeVec z(&Grid); random(pRNG,z);
|
||||||
LatticeVec x(&Grid); //random(pRNG,x);
|
LatticeVec x(&Grid); random(pRNG,x);
|
||||||
LatticeVec y(&Grid); //random(pRNG,y);
|
LatticeVec y(&Grid); random(pRNG,y);
|
||||||
double a=2.0;
|
double a=2.0;
|
||||||
|
|
||||||
|
uint64_t Nloop=NLOOP;
|
||||||
|
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
z=a*x-y;
|
z=a*x-y;
|
||||||
|
x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away
|
||||||
|
y._odata[4]=z._odata[4];
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000;
|
double time = (stop-start)/Nloop*1000;
|
||||||
|
|
||||||
double flops=vol*Nvec*2;// mul,add
|
double flops=vol*Nvec*2;// mul,add
|
||||||
double bytes=3*vol*Nvec*sizeof(Real);
|
double bytes=3*vol*Nvec*sizeof(Real);
|
||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking SCALE bandwidth"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SCALE bandwidth"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||||
|
|
||||||
|
for(int lat=4;lat<=lmax;lat+=4){
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=4){
|
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
uint64_t Nloop=NLOOP;
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||||
|
|
||||||
LatticeVec z(&Grid); //random(pRNG,z);
|
LatticeVec z(&Grid); random(pRNG,z);
|
||||||
LatticeVec x(&Grid); //random(pRNG,x);
|
LatticeVec x(&Grid); random(pRNG,x);
|
||||||
LatticeVec y(&Grid); //random(pRNG,y);
|
LatticeVec y(&Grid); random(pRNG,y);
|
||||||
RealD a=2.0;
|
RealD a=2.0;
|
||||||
|
|
||||||
|
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
z=a*x;
|
z=a*x;
|
||||||
|
x._odata[0]=z._odata[0]*2.0;
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000;
|
double time = (stop-start)/Nloop*1000;
|
||||||
|
|
||||||
double bytes=2*vol*Nvec*sizeof(Real);
|
double bytes=2*vol*Nvec*sizeof(Real);
|
||||||
double flops=vol*Nvec*1;// mul
|
double flops=vol*Nvec*1;// mul
|
||||||
std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking READ bandwidth"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking READ bandwidth"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=4){
|
for(int lat=4;lat<=lmax;lat+=4){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
uint64_t Nloop=NLOOP;
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||||
|
LatticeVec z(&Grid); random(pRNG,z);
|
||||||
LatticeVec z(&Grid); //random(pRNG,z);
|
LatticeVec x(&Grid); random(pRNG,x);
|
||||||
LatticeVec x(&Grid); //random(pRNG,x);
|
LatticeVec y(&Grid); random(pRNG,y);
|
||||||
LatticeVec y(&Grid); //random(pRNG,y);
|
|
||||||
RealD a=2.0;
|
RealD a=2.0;
|
||||||
ComplexD nn;
|
Real nn;
|
||||||
|
|
||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
nn=norm2(x);
|
nn=norm2(x);
|
||||||
|
vsplat(x._odata[0]._internal[0],nn);
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000;
|
double time = (stop-start)/Nloop*1000;
|
||||||
|
|
||||||
double bytes=vol*Nvec*sizeof(Real);
|
double bytes=vol*Nvec*sizeof(Real);
|
||||||
double flops=vol*Nvec*2;// mul,add
|
double flops=vol*Nvec*2;// mul,add
|
||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
|
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
|
||||||
|
|
||||||
|
|
||||||
Benchmark_comms_SOURCES=Benchmark_comms.cc
|
Benchmark_comms_SOURCES=Benchmark_comms.cc
|
||||||
@ -10,6 +10,10 @@ Benchmark_dwf_SOURCES=Benchmark_dwf.cc
|
|||||||
Benchmark_dwf_LDADD=-lGrid
|
Benchmark_dwf_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
|
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
|
||||||
|
Benchmark_memory_asynch_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
|
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
|
||||||
Benchmark_memory_bandwidth_LDADD=-lGrid
|
Benchmark_memory_bandwidth_LDADD=-lGrid
|
||||||
|
|
||||||
|
123
configure
vendored
123
configure
vendored
@ -1384,9 +1384,9 @@ Optional Features:
|
|||||||
--disable-dependency-tracking
|
--disable-dependency-tracking
|
||||||
speeds up one-time build
|
speeds up one-time build
|
||||||
--disable-openmp do not use OpenMP
|
--disable-openmp do not use OpenMP
|
||||||
--enable-simd=SSE4|AVX|AVX2|AVX512|MIC
|
--enable-simd=SSE4|AVX|AVX2|AVX512|IMCI
|
||||||
Select instructions to be SSE4.0, AVX 1.0, AVX
|
Select instructions to be SSE4.0, AVX 1.0, AVX
|
||||||
2.0+FMA, AVX 512, MIC
|
2.0+FMA, AVX 512, IMCI
|
||||||
--enable-precision=single|double
|
--enable-precision=single|double
|
||||||
Select default word size of Real
|
Select default word size of Real
|
||||||
--enable-comms=none|mpi Select communications
|
--enable-comms=none|mpi Select communications
|
||||||
@ -6359,105 +6359,15 @@ fi
|
|||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __gmpf_init in -lgmp" >&5
|
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
||||||
$as_echo_n "checking for __gmpf_init in -lgmp... " >&6; }
|
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
||||||
if ${ac_cv_lib_gmp___gmpf_init+:} false; then :
|
#Please install or provide the correct path to your installation
|
||||||
$as_echo_n "(cached) " >&6
|
#Info at: http://www.gmplib.org)])
|
||||||
else
|
|
||||||
ac_check_lib_save_LIBS=$LIBS
|
|
||||||
LIBS="-lgmp $LIBS"
|
|
||||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
|
||||||
/* end confdefs.h. */
|
|
||||||
|
|
||||||
/* Override any GCC internal prototype to avoid an error.
|
|
||||||
Use char because int might match the return type of a GCC
|
|
||||||
builtin and then its argument prototype would still apply. */
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C"
|
|
||||||
#endif
|
|
||||||
char __gmpf_init ();
|
|
||||||
int
|
|
||||||
main ()
|
|
||||||
{
|
|
||||||
return __gmpf_init ();
|
|
||||||
;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
_ACEOF
|
|
||||||
if ac_fn_cxx_try_link "$LINENO"; then :
|
|
||||||
ac_cv_lib_gmp___gmpf_init=yes
|
|
||||||
else
|
|
||||||
ac_cv_lib_gmp___gmpf_init=no
|
|
||||||
fi
|
|
||||||
rm -f core conftest.err conftest.$ac_objext \
|
|
||||||
conftest$ac_exeext conftest.$ac_ext
|
|
||||||
LIBS=$ac_check_lib_save_LIBS
|
|
||||||
fi
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gmp___gmpf_init" >&5
|
|
||||||
$as_echo "$ac_cv_lib_gmp___gmpf_init" >&6; }
|
|
||||||
if test "x$ac_cv_lib_gmp___gmpf_init" = xyes; then :
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
|
||||||
#define HAVE_LIBGMP 1
|
|
||||||
_ACEOF
|
|
||||||
|
|
||||||
LIBS="-lgmp $LIBS"
|
|
||||||
|
|
||||||
else
|
|
||||||
as_fn_error $? "GNU Multiple Precision GMP library was not found in your system.
|
|
||||||
Please install or provide the correct path to your installation
|
|
||||||
Info at: http://www.gmplib.org" "$LINENO" 5
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpfr_init in -lmpfr" >&5
|
|
||||||
$as_echo_n "checking for mpfr_init in -lmpfr... " >&6; }
|
|
||||||
if ${ac_cv_lib_mpfr_mpfr_init+:} false; then :
|
|
||||||
$as_echo_n "(cached) " >&6
|
|
||||||
else
|
|
||||||
ac_check_lib_save_LIBS=$LIBS
|
|
||||||
LIBS="-lmpfr $LIBS"
|
|
||||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
|
||||||
/* end confdefs.h. */
|
|
||||||
|
|
||||||
/* Override any GCC internal prototype to avoid an error.
|
|
||||||
Use char because int might match the return type of a GCC
|
|
||||||
builtin and then its argument prototype would still apply. */
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C"
|
|
||||||
#endif
|
|
||||||
char mpfr_init ();
|
|
||||||
int
|
|
||||||
main ()
|
|
||||||
{
|
|
||||||
return mpfr_init ();
|
|
||||||
;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
_ACEOF
|
|
||||||
if ac_fn_cxx_try_link "$LINENO"; then :
|
|
||||||
ac_cv_lib_mpfr_mpfr_init=yes
|
|
||||||
else
|
|
||||||
ac_cv_lib_mpfr_mpfr_init=no
|
|
||||||
fi
|
|
||||||
rm -f core conftest.err conftest.$ac_objext \
|
|
||||||
conftest$ac_exeext conftest.$ac_ext
|
|
||||||
LIBS=$ac_check_lib_save_LIBS
|
|
||||||
fi
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpfr_mpfr_init" >&5
|
|
||||||
$as_echo "$ac_cv_lib_mpfr_mpfr_init" >&6; }
|
|
||||||
if test "x$ac_cv_lib_mpfr_mpfr_init" = xyes; then :
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
|
||||||
#define HAVE_LIBMPFR 1
|
|
||||||
_ACEOF
|
|
||||||
|
|
||||||
LIBS="-lmpfr $LIBS"
|
|
||||||
|
|
||||||
else
|
|
||||||
as_fn_error $? "GNU Multiple Precision MPFR library was not found in your system.
|
|
||||||
Please install or provide the correct path to your installation
|
|
||||||
Info at: http://www.mpfr.org/" "$LINENO" 5
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
||||||
|
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
||||||
|
#Please install or provide the correct path to your installation
|
||||||
|
#Info at: http://www.mpfr.org/)])
|
||||||
|
|
||||||
# Check whether --enable-simd was given.
|
# Check whether --enable-simd was given.
|
||||||
if test "${enable_simd+set}" = set; then :
|
if test "${enable_simd+set}" = set; then :
|
||||||
@ -6504,13 +6414,20 @@ $as_echo "#define AVX2 1" >>confdefs.h
|
|||||||
$as_echo "$as_me: WARNING: Your processor does not support AVX2 instructions" >&2;}
|
$as_echo "$as_me: WARNING: Your processor does not support AVX2 instructions" >&2;}
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
AVX512|MIC)
|
AVX512)
|
||||||
echo Configuring for AVX512 and MIC
|
echo Configuring for AVX512
|
||||||
|
|
||||||
$as_echo "#define AVX512 1" >>confdefs.h
|
$as_echo "#define AVX512 1" >>confdefs.h
|
||||||
|
|
||||||
supported="cross compilation"
|
supported="cross compilation"
|
||||||
;;
|
;;
|
||||||
|
IMCI)
|
||||||
|
echo Configuring for IMCI
|
||||||
|
|
||||||
|
$as_echo "#define IMCI 1" >>confdefs.h
|
||||||
|
|
||||||
|
supported="cross compilation"
|
||||||
|
;;
|
||||||
NEONv8)
|
NEONv8)
|
||||||
echo Configuring for experimental ARMv8a support
|
echo Configuring for experimental ARMv8a support
|
||||||
|
|
||||||
@ -6533,7 +6450,7 @@ esac
|
|||||||
if test "${enable_precision+set}" = set; then :
|
if test "${enable_precision+set}" = set; then :
|
||||||
enableval=$enable_precision; ac_PRECISION=${enable_precision}
|
enableval=$enable_precision; ac_PRECISION=${enable_precision}
|
||||||
else
|
else
|
||||||
ac_PRECISION=single
|
ac_PRECISION=double
|
||||||
fi
|
fi
|
||||||
|
|
||||||
case ${ac_PRECISION} in
|
case ${ac_PRECISION} in
|
||||||
|
33
configure.ac
33
configure.ac
@ -55,18 +55,18 @@ echo :::::::::::::::::::::::::::::::::::::::::::
|
|||||||
|
|
||||||
AC_CHECK_FUNCS([gettimeofday])
|
AC_CHECK_FUNCS([gettimeofday])
|
||||||
|
|
||||||
AC_CHECK_LIB([gmp],[__gmpf_init],,
|
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
||||||
[AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
||||||
Please install or provide the correct path to your installation
|
#Please install or provide the correct path to your installation
|
||||||
Info at: http://www.gmplib.org)])
|
#Info at: http://www.gmplib.org)])
|
||||||
|
|
||||||
AC_CHECK_LIB([mpfr],[mpfr_init],,
|
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
||||||
[AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
||||||
Please install or provide the correct path to your installation
|
#Please install or provide the correct path to your installation
|
||||||
Info at: http://www.mpfr.org/)])
|
#Info at: http://www.mpfr.org/)])
|
||||||
|
|
||||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
|
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|IMCI],\
|
||||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
|
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
|
||||||
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
|
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
|
||||||
|
|
||||||
supported=no
|
supported=no
|
||||||
@ -99,9 +99,14 @@ case ${ac_SIMD} in
|
|||||||
AC_MSG_WARN([Your processor does not support AVX2 instructions])
|
AC_MSG_WARN([Your processor does not support AVX2 instructions])
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
AVX512|MIC)
|
AVX512)
|
||||||
echo Configuring for AVX512 and MIC
|
echo Configuring for AVX512
|
||||||
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Corner] )
|
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
|
||||||
|
supported="cross compilation"
|
||||||
|
;;
|
||||||
|
IMCI)
|
||||||
|
echo Configuring for IMCI
|
||||||
|
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
|
||||||
supported="cross compilation"
|
supported="cross compilation"
|
||||||
;;
|
;;
|
||||||
NEONv8)
|
NEONv8)
|
||||||
@ -118,7 +123,7 @@ case ${ac_SIMD} in
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=single])
|
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
||||||
case ${ac_PRECISION} in
|
case ${ac_PRECISION} in
|
||||||
single)
|
single)
|
||||||
echo default precision is single
|
echo default precision is single
|
||||||
|
@ -17,6 +17,9 @@
|
|||||||
|
|
||||||
#include <algorithms/iterative/ConjugateGradientMultiShift.h>
|
#include <algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||||
|
|
||||||
|
// Lanczos support
|
||||||
|
//#include <algorithms/iterative/MatrixUtils.h>
|
||||||
|
//#include <algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||||
|
|
||||||
#include <algorithms/CoarsenedMatrix.h>
|
#include <algorithms/CoarsenedMatrix.h>
|
||||||
|
|
||||||
|
@ -72,7 +72,6 @@ operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return t
|
|||||||
|
|
||||||
template<typename _Tp> inline bool
|
template<typename _Tp> inline bool
|
||||||
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
|
|
||||||
}; // namespace Grid
|
}; // namespace Grid
|
||||||
#endif
|
#endif
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
/* AVX2 Intrinsics */
|
/* AVX2 Intrinsics */
|
||||||
#undef AVX2
|
#undef AVX2
|
||||||
|
|
||||||
/* AVX512 Intrinsics for Knights Corner */
|
/* AVX512 Intrinsics for Knights Landing */
|
||||||
#undef AVX512
|
#undef AVX512
|
||||||
|
|
||||||
/* EMPTY_SIMD only for DEBUGGING */
|
/* EMPTY_SIMD only for DEBUGGING */
|
||||||
@ -56,12 +56,6 @@
|
|||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
#undef HAVE_INTTYPES_H
|
#undef HAVE_INTTYPES_H
|
||||||
|
|
||||||
/* Define to 1 if you have the `gmp' library (-lgmp). */
|
|
||||||
#undef HAVE_LIBGMP
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
|
|
||||||
#undef HAVE_LIBMPFR
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <malloc.h> header file. */
|
/* Define to 1 if you have the <malloc.h> header file. */
|
||||||
#undef HAVE_MALLOC_H
|
#undef HAVE_MALLOC_H
|
||||||
|
|
||||||
@ -116,6 +110,9 @@
|
|||||||
/* Define to 1 if you have the <unistd.h> header file. */
|
/* Define to 1 if you have the <unistd.h> header file. */
|
||||||
#undef HAVE_UNISTD_H
|
#undef HAVE_UNISTD_H
|
||||||
|
|
||||||
|
/* IMCI Intrinsics for Knights Corner */
|
||||||
|
#undef IMCI
|
||||||
|
|
||||||
/* NEON ARMv8 Experimental support */
|
/* NEON ARMv8 Experimental support */
|
||||||
#undef NEONv8
|
#undef NEONv8
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@
|
|||||||
///////////////////
|
///////////////////
|
||||||
// Grid headers
|
// Grid headers
|
||||||
///////////////////
|
///////////////////
|
||||||
#include <MacroMagic.h>
|
#include <serialisation/Serialisation.h>
|
||||||
#include <Config.h>
|
#include <Config.h>
|
||||||
#include <Timer.h>
|
#include <Timer.h>
|
||||||
#include <Log.h>
|
#include <Log.h>
|
||||||
@ -45,6 +45,7 @@
|
|||||||
#include <Stencil.h>
|
#include <Stencil.h>
|
||||||
#include <Algorithms.h>
|
#include <Algorithms.h>
|
||||||
#include <qcd/QCD.h>
|
#include <qcd/QCD.h>
|
||||||
|
#include <parallelIO/BinaryIO.h>
|
||||||
#include <parallelIO/NerscIO.h>
|
#include <parallelIO/NerscIO.h>
|
||||||
|
|
||||||
#include <Init.h>
|
#include <Init.h>
|
||||||
|
@ -177,7 +177,8 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
Grid_quiesce_nodes();
|
Grid_quiesce_nodes();
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
|
||||||
WilsonFermionStatic::HandOptDslash=1;
|
QCD::WilsonFermionStatic::HandOptDslash=1;
|
||||||
|
QCD::WilsonFermion5DStatic::HandOptDslash=1;
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
||||||
LebesgueOrder::UseLebesgueOrder=1;
|
LebesgueOrder::UseLebesgueOrder=1;
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
|
|
||||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Config.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./Init.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./Log.h ./MacroMagic.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./parallelIO/NerscIO.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h ./Timer.h
|
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Config.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./Init.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./pugixml/pugixml.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h ./Timer.h
|
||||||
|
|
||||||
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./Init.cc ./Log.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/hmc/HMC.cc ./qcd/hmc/integrators/Integrator.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./Init.cc ./Log.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
||||||
|
@ -13,6 +13,11 @@
|
|||||||
|
|
||||||
typedef uint32_t Integer;
|
typedef uint32_t Integer;
|
||||||
|
|
||||||
|
#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
|
||||||
|
#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
|
||||||
|
#define _MM_SELECT_FOUR_TWO (A,B,C,D) _MM_SELECT_EIGHT_TWO(0,0,0,0,A,B,C,D)
|
||||||
|
#define _MM_SELECT_TWO_TWO (A,B) _MM_SELECT_FOUR_TWO(0,0,A,B)
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
typedef float RealF;
|
typedef float RealF;
|
||||||
|
@ -17,7 +17,8 @@
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
#include <algorithms/approx/bigfloat.h>
|
//#include <algorithms/approx/bigfloat.h>
|
||||||
|
#include <algorithms/approx/bigfloat_double.h>
|
||||||
|
|
||||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||||
#define SUM_MAX 10 // Maximum number of terms in exponential
|
#define SUM_MAX 10 // Maximum number of terms in exponential
|
||||||
|
388
lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Normal file
388
lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Normal file
@ -0,0 +1,388 @@
|
|||||||
|
#ifndef GRID_IRL_H
|
||||||
|
#define GRID_IRL_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Base classes for iterative processes based on operators
|
||||||
|
// single input vec, single output vec.
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class ImplicitlyRestartedLanczos {
|
||||||
|
public:
|
||||||
|
|
||||||
|
int Niter;
|
||||||
|
int Nk;
|
||||||
|
int Np;
|
||||||
|
RealD enorm;
|
||||||
|
RealD vthr;
|
||||||
|
|
||||||
|
LinearOperatorBase<Field> &_Linop;
|
||||||
|
OperatorFunction<Field> &_poly;
|
||||||
|
|
||||||
|
ImplicitlyRestartedLanczos(
|
||||||
|
LinearOperatorBase<Field> &Linop,
|
||||||
|
OperatorFunction<Field> & poly,
|
||||||
|
int _Nk,
|
||||||
|
int _Np,
|
||||||
|
RealD _enorm,
|
||||||
|
RealD _vthrs,
|
||||||
|
int _Niter) :
|
||||||
|
_Linop(Linop),
|
||||||
|
_poly(poly),
|
||||||
|
Nk(_Nk),
|
||||||
|
Np(_Np),
|
||||||
|
enorm(_enorm),
|
||||||
|
vthr(_vthrs)
|
||||||
|
{
|
||||||
|
vthr=_vthrs;
|
||||||
|
Niter=_Niter;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void step(Vector<RealD>& lmda,
|
||||||
|
Vector<RealD>& lmdb,
|
||||||
|
Vector<Field>& evec,
|
||||||
|
Field& f,int Nm,int k)
|
||||||
|
{
|
||||||
|
|
||||||
|
assert( k< Nm );
|
||||||
|
|
||||||
|
w = opr_->mult(evec[k]);
|
||||||
|
|
||||||
|
if(k==0){ // Initial step
|
||||||
|
|
||||||
|
RealD wnorm= w*w;
|
||||||
|
std::cout<<"wnorm="<<wnorm<<std::endl;
|
||||||
|
|
||||||
|
RealD alph = evec[k] * w;
|
||||||
|
w -= alph * evec[k];
|
||||||
|
lmd[k] = alph;
|
||||||
|
|
||||||
|
RealD beta = w * w;
|
||||||
|
beta = sqrt(beta);
|
||||||
|
RealD betar = 1.0/beta;
|
||||||
|
|
||||||
|
evec[k+1] = betar * w;
|
||||||
|
lme[k] = beta;
|
||||||
|
|
||||||
|
} else { // Iteration step
|
||||||
|
|
||||||
|
w -= lme[k-1] * evec[k-1];
|
||||||
|
|
||||||
|
RealD alph = evec[k] * w;
|
||||||
|
w -= alph * evec[k];
|
||||||
|
|
||||||
|
RealD beta = w * w;
|
||||||
|
beta = sqrt(beta);
|
||||||
|
RealD betar = 1.0/beta;
|
||||||
|
w *= betar;
|
||||||
|
|
||||||
|
lmd[k] = alph;
|
||||||
|
lme[k] = beta;
|
||||||
|
orthogonalize(w,evec,k);
|
||||||
|
|
||||||
|
if(k < Nm-1) evec[k+1] = w;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void qr_decomp(Vector<RealD>& lmda,
|
||||||
|
Vector<RealD>& lmdb,
|
||||||
|
int Nk,
|
||||||
|
int Nm,
|
||||||
|
Vector<RealD>& Qt,
|
||||||
|
RealD Dsft,
|
||||||
|
int kmin,
|
||||||
|
int kmax)
|
||||||
|
{
|
||||||
|
int k = kmin-1;
|
||||||
|
RealD x;
|
||||||
|
|
||||||
|
RealD Fden = 1.0/sqrt((lmd[k]-Dsh)*(lmd[k]-Dsh) +lme[k]*lme[k]);
|
||||||
|
RealD c = ( lmd[k] -Dsh) *Fden;
|
||||||
|
RealD s = -lme[k] *Fden;
|
||||||
|
|
||||||
|
RealD tmpa1 = lmd[k];
|
||||||
|
RealD tmpa2 = lmd[k+1];
|
||||||
|
RealD tmpb = lme[k];
|
||||||
|
|
||||||
|
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||||
|
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||||
|
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||||
|
x = -s*lme[k+1];
|
||||||
|
lme[k+1] = c*lme[k+1];
|
||||||
|
|
||||||
|
for(int i=0; i<Nk; ++i){
|
||||||
|
RealD Qtmp1 = Qt[i+Nm*k ];
|
||||||
|
RealD Qtmp2 = Qt[i+Nm*(k+1)];
|
||||||
|
Qt[i+Nm*k ] = c*Qtmp1 - s*Qtmp2;
|
||||||
|
Qt[i+Nm*(k+1)] = s*Qtmp1 + c*Qtmp2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Givens transformations
|
||||||
|
for(int k = kmin; k < kmax-1; ++k){
|
||||||
|
RealD Fden = 1.0/sqrt( x*x +lme[k-1]*lme[k-1]);
|
||||||
|
RealD c = lme[k-1]*Fden;
|
||||||
|
RealD s = - x*Fden;
|
||||||
|
|
||||||
|
RealD tmpa1 = lmd[k];
|
||||||
|
RealD tmpa2 = lmd[k+1];
|
||||||
|
RealD tmpb = lme[k];
|
||||||
|
|
||||||
|
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||||
|
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||||
|
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||||
|
lme[k-1] = c*lme[k-1] -s*x;
|
||||||
|
|
||||||
|
if(k != kmax-2){
|
||||||
|
x = -s*lme[k+1];
|
||||||
|
lme[k+1] = c*lme[k+1];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i=0; i<Nk; ++i){
|
||||||
|
RealD Qtmp1 = Qt[i+Nm*k ];
|
||||||
|
RealD Qtmp2 = Qt[i+Nm*(k+1)];
|
||||||
|
Qt[i+Nm*k ] = c*Qtmp1 -s*Qtmp2;
|
||||||
|
Qt[i+Nm*(k+1)] = s*Qtmp1 +c*Qtmp2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void diagonalize(Vector<RealD>& lmda,
|
||||||
|
Vector<RealD>& lmdb,
|
||||||
|
int Nm2,
|
||||||
|
int Nm,
|
||||||
|
Vector<RealD>& Qt)
|
||||||
|
{
|
||||||
|
int Niter = 100*Nm;
|
||||||
|
int kmin = 1;
|
||||||
|
int kmax = Nk;
|
||||||
|
// (this should be more sophisticated)
|
||||||
|
|
||||||
|
for(int iter=0; iter<Niter; ++iter){
|
||||||
|
|
||||||
|
// determination of 2x2 leading submatrix
|
||||||
|
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
|
||||||
|
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
|
||||||
|
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
|
||||||
|
// (Dsh: shift)
|
||||||
|
|
||||||
|
// transformation
|
||||||
|
qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax);
|
||||||
|
|
||||||
|
// Convergence criterion (redef of kmin and kamx)
|
||||||
|
for(int j=kmax-1; j>= kmin; --j){
|
||||||
|
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
|
||||||
|
if(fabs(lme[j-1])+dds > dds){
|
||||||
|
kmax = j+1;
|
||||||
|
goto continued;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Niter = iter;
|
||||||
|
return;
|
||||||
|
|
||||||
|
continued:
|
||||||
|
for(int j=0; j<kmax-1; ++j){
|
||||||
|
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
|
||||||
|
if(fabs(lme[j])+dds > dds){
|
||||||
|
kmin = j+1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
void orthogonalize(Field& w,
|
||||||
|
const Vector<Field>& evec,
|
||||||
|
int k)
|
||||||
|
{
|
||||||
|
// Schmidt orthogonalization
|
||||||
|
|
||||||
|
size_t size = w.size();
|
||||||
|
assert(size%2 ==0);
|
||||||
|
|
||||||
|
std::slice re(0,size/2,2);
|
||||||
|
std::slice im(1,size/2,2);
|
||||||
|
|
||||||
|
for(int j=0; j<k; ++j){
|
||||||
|
RealD prdr = evec[j]*w;
|
||||||
|
RealD prdi = evec[j].im_prod(w);
|
||||||
|
|
||||||
|
valarray<RealD> evr(evec[j][re]);
|
||||||
|
valarray<RealD> evi(evec[j][im]);
|
||||||
|
|
||||||
|
w.add(re, -prdr*evr +prdi*evi);
|
||||||
|
w.add(im, -prdr*evi -prdi*evr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void calc(Vector<RealD>& lmd,
|
||||||
|
Vector<Field>& evec,
|
||||||
|
const Field& b,
|
||||||
|
int& Nsbt,
|
||||||
|
int& Nconv)
|
||||||
|
{
|
||||||
|
const size_t fsize = evec[0].size();
|
||||||
|
|
||||||
|
Nconv = -1;
|
||||||
|
Nsbt = 0;
|
||||||
|
int Nm = Nk_+Np_;
|
||||||
|
|
||||||
|
std::cout << " -- Nk = " << Nk_ << " Np = "<< Np_ << endl;
|
||||||
|
std::cout << " -- Nm = " << Nm << endl;
|
||||||
|
std::cout << " -- size of lmd = " << lmd.size() << endl;
|
||||||
|
std::cout << " -- size of evec = " << evec.size() << endl;
|
||||||
|
|
||||||
|
assert(Nm < evec.size() && Nm < lmd.size());
|
||||||
|
|
||||||
|
vector<RealD> lme(Nm);
|
||||||
|
vector<RealD> lmd2(Nm);
|
||||||
|
vector<RealD> lme2(Nm);
|
||||||
|
vector<RealD> Qt(Nm*Nm);
|
||||||
|
vector<int> Iconv(Nm);
|
||||||
|
|
||||||
|
vector<Field> B(Nm);
|
||||||
|
for(int k=0; k<Nm; ++k) B[k].resize(fsize);
|
||||||
|
|
||||||
|
Field f(fsize);
|
||||||
|
Field v(fsize);
|
||||||
|
|
||||||
|
int k1 = 1;
|
||||||
|
int k2 = Nk_;
|
||||||
|
int kconv = 0;
|
||||||
|
|
||||||
|
int Kdis = 0;
|
||||||
|
int Kthrs = 0;
|
||||||
|
RealD beta_k;
|
||||||
|
|
||||||
|
// Set initial vector
|
||||||
|
evec[0] = 1.0;
|
||||||
|
RealD vnorm = evec[0]*evec[0];
|
||||||
|
evec[0] = 1.0/sqrt(vnorm);
|
||||||
|
// (uniform vector)
|
||||||
|
|
||||||
|
// Initial Nk steps
|
||||||
|
for(int k=0; k<k2; ++k) step(lmd,lme,evec,f,Nm,k);
|
||||||
|
|
||||||
|
// Restarting loop begins
|
||||||
|
for(int iter = 0; iter<Niter_; ++iter){
|
||||||
|
std::cout<<"\n iteration = "<< iter << endl;
|
||||||
|
|
||||||
|
int Nm2 = Nm - kconv;
|
||||||
|
for(int k=k2; k<Nm; ++k) step(lmd,lme,evec,f,Nm,k);
|
||||||
|
f *= lme[Nm-1];
|
||||||
|
|
||||||
|
// getting eigenvalues
|
||||||
|
for(int k=0; k<Nm2; ++k){
|
||||||
|
lmd2[k] = lmd[k+k1-1];
|
||||||
|
lme2[k] = lme[k+k1-1];
|
||||||
|
}
|
||||||
|
setUnit_Qt(Nm,Qt);
|
||||||
|
diagonalize(lmd2,lme2,Nm2,Nm,Qt);
|
||||||
|
// sorting
|
||||||
|
sort_->push(lmd2,Nm);
|
||||||
|
|
||||||
|
// Implicitly shifted QR transformations
|
||||||
|
setUnit_Qt(Nm,Qt);
|
||||||
|
for(int ip=k2; ip<Nm; ++ip)
|
||||||
|
qr_decomp(lmd,lme,Nm,Nm,Qt,lmd2[ip],k1,Nm);
|
||||||
|
|
||||||
|
for(int i=0; i<(Nk_+1); ++i) B[i] = 0.0;
|
||||||
|
|
||||||
|
for(int j=k1-1; j<k2+1; ++j){
|
||||||
|
for(int k=0; k<Nm; ++k){
|
||||||
|
B[j] += Qt[k+Nm*j] * evec[k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
|
||||||
|
|
||||||
|
// Compressed vector f and beta(k2)
|
||||||
|
f *= Qt[Nm-1+Nm*(k2-1)];
|
||||||
|
f += lme[k2-1] * evec[k2];
|
||||||
|
beta_k = f * f;
|
||||||
|
beta_k = sqrt(beta_k);
|
||||||
|
std::cout<<" beta(k) = "<<beta_k<<endl;
|
||||||
|
|
||||||
|
RealD betar = 1.0/beta_k;
|
||||||
|
evec[k2] = betar * f;
|
||||||
|
lme[k2-1] = beta_k;
|
||||||
|
|
||||||
|
// Convergence test
|
||||||
|
for(int k=0; k<Nm2; ++k){
|
||||||
|
lmd2[k] = lmd[k];
|
||||||
|
lme2[k] = lme[k];
|
||||||
|
}
|
||||||
|
setUnit_Qt(Nm,Qt);
|
||||||
|
diagonalize(lmd2,lme2,Nk_,Nm,Qt);
|
||||||
|
|
||||||
|
for(int k = 0; k<Nk_; ++k) B[k]=0.0;
|
||||||
|
|
||||||
|
for(int j = 0; j<Nk_; ++j){
|
||||||
|
for(int k = 0; k<Nk_; ++k){
|
||||||
|
B[j] += Qt[k+j*Nm] * evec[k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Kdis = 0;
|
||||||
|
Kthrs = 0;
|
||||||
|
|
||||||
|
std::cout << setiosflags(ios_base::scientific);
|
||||||
|
for(int i=0; i<Nk_; ++i){
|
||||||
|
v = opr_->mult(B[i]);
|
||||||
|
//std::cout<<"vv="<<v*v<<std::endl;
|
||||||
|
|
||||||
|
RealD vnum = B[i]*v;
|
||||||
|
RealD vden = B[i]*B[i];
|
||||||
|
lmd2[i] = vnum/vden;
|
||||||
|
v -= lmd2[i]*B[i];
|
||||||
|
RealD vv = v*v;
|
||||||
|
|
||||||
|
std::cout << " [" << setw(3)<< setiosflags(ios_base::right) <<i<<"] ";
|
||||||
|
std::cout << setw(25)<< setiosflags(ios_base::left)<< lmd2[i];
|
||||||
|
std::cout <<" "<< setw(25)<< setiosflags(ios_base::right)<< vv<< endl;
|
||||||
|
|
||||||
|
if(vv<enorm_){
|
||||||
|
Iconv[Kdis] = i;
|
||||||
|
++Kdis;
|
||||||
|
if(sort_->saturated(lmd2[i],vthr)) ++Kthrs;
|
||||||
|
std::cout<<"Kthrs="<<Kthrs<<endl;
|
||||||
|
}
|
||||||
|
} // i-loop end
|
||||||
|
std::cout << resetiosflags(ios_base::scientific);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout<<" #modes converged: "<<Kdis<<endl;
|
||||||
|
|
||||||
|
if(Kthrs > 0){
|
||||||
|
// (there is a converged eigenvalue larger than Vthrs.)
|
||||||
|
Nconv = iter;
|
||||||
|
goto converged;
|
||||||
|
}
|
||||||
|
} // end of iter loop
|
||||||
|
|
||||||
|
std::cout<<"\n NOT converged.\n";
|
||||||
|
abort();
|
||||||
|
|
||||||
|
converged:
|
||||||
|
// Sorting
|
||||||
|
|
||||||
|
lmd.clear();
|
||||||
|
evec.clear();
|
||||||
|
for(int i=0; i<Kdis; ++i){
|
||||||
|
lmd.push_back(lmd2[Iconv[i]]);
|
||||||
|
evec.push_back(B[Iconv[i]]);
|
||||||
|
}
|
||||||
|
sort_->push(lmd,evec,Kdis);
|
||||||
|
Nsbt = Kdis - Kthrs;
|
||||||
|
|
||||||
|
std::cout << "\n Converged\n Summary :\n";
|
||||||
|
std::cout << " -- Iterations = "<< Nconv << "\n";
|
||||||
|
std::cout << " -- beta(k) = "<< beta_k << "\n";
|
||||||
|
std::cout << " -- Kdis = "<< Kdis << "\n";
|
||||||
|
std::cout << " -- Nsbt = "<< Nsbt << "\n";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
48
lib/algorithms/iterative/MatrixUtils.h
Normal file
48
lib/algorithms/iterative/MatrixUtils.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#ifndef GRID_MATRIX_UTILS_H
|
||||||
|
#define GRID_MATRIX_UTILS_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
namespace MatrixUtils {
|
||||||
|
|
||||||
|
template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
|
||||||
|
N=A.size(); assert(N>0);
|
||||||
|
M=A[0].size();
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
assert(A[i].size()==M);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
|
||||||
|
{
|
||||||
|
int M;
|
||||||
|
Size(A,N,M);
|
||||||
|
assert(N==M);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> inline void Fill(Matrix<T>& A,T & val)
|
||||||
|
{
|
||||||
|
int N,M;
|
||||||
|
Size(A,N,M);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<M;j++){
|
||||||
|
A[i][j]=val;
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
template<class T> inline void Diagonal(Matrix<T>& A,T & val)
|
||||||
|
{
|
||||||
|
int N;
|
||||||
|
SizeSquare(A,N);
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
A[i][i]=val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class T> inline void Identity(Matrix<T>& A)
|
||||||
|
{
|
||||||
|
Fill(A,0.0);
|
||||||
|
Diagonal(A,1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@ -87,6 +87,14 @@ class CartesianCommunicator {
|
|||||||
void *recv,
|
void *recv,
|
||||||
int recv_from_rank,
|
int recv_from_rank,
|
||||||
int bytes);
|
int bytes);
|
||||||
|
|
||||||
|
void RecvFrom(void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes);
|
||||||
|
void SendTo(void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
int bytes);
|
||||||
|
|
||||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
void *xmit,
|
void *xmit,
|
||||||
int xmit_to_rank,
|
int xmit_to_rank,
|
||||||
|
@ -81,13 +81,30 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
|||||||
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||||
SendToRecvFromComplete(reqs);
|
SendToRecvFromComplete(reqs);
|
||||||
}
|
}
|
||||||
|
void CartesianCommunicator::RecvFrom(void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Status stat;
|
||||||
|
int ierr=MPI_Recv(recv, bytes, MPI_CHAR,from,from,communicator,&stat);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::SendTo(void *xmit,
|
||||||
|
int dest,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
int rank = _processor; // used for tag; must know who it comes from
|
||||||
|
int ierr = MPI_Send(xmit, bytes, MPI_CHAR,dest,_processor,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
// Basic Halo comms primitive
|
// Basic Halo comms primitive
|
||||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
void *xmit,
|
void *xmit,
|
||||||
int dest,
|
int dest,
|
||||||
void *recv,
|
void *recv,
|
||||||
int from,
|
int from,
|
||||||
int bytes)
|
int bytes)
|
||||||
{
|
{
|
||||||
MPI_Request xrq;
|
MPI_Request xrq;
|
||||||
MPI_Request rrq;
|
MPI_Request rrq;
|
||||||
@ -100,7 +117,6 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
|||||||
|
|
||||||
list.push_back(xrq);
|
list.push_back(xrq);
|
||||||
list.push_back(rrq);
|
list.push_back(rrq);
|
||||||
|
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
{
|
{
|
||||||
|
@ -22,6 +22,20 @@ void CartesianCommunicator::GlobalSum(double &){}
|
|||||||
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
||||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
||||||
|
|
||||||
|
void CartesianCommunicator::RecvFrom(void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::SendTo(void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Basic Halo comms primitive -- should never call in single node
|
// Basic Halo comms primitive -- should never call in single node
|
||||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||||
int dest,
|
int dest,
|
||||||
|
@ -178,6 +178,7 @@ GridUnopClass(UnaryConj,conjugate(a));
|
|||||||
GridUnopClass(UnaryTrace,trace(a));
|
GridUnopClass(UnaryTrace,trace(a));
|
||||||
GridUnopClass(UnaryTranspose,transpose(a));
|
GridUnopClass(UnaryTranspose,transpose(a));
|
||||||
GridUnopClass(UnaryTa,Ta(a));
|
GridUnopClass(UnaryTa,Ta(a));
|
||||||
|
GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a));
|
||||||
GridUnopClass(UnaryReal,real(a));
|
GridUnopClass(UnaryReal,real(a));
|
||||||
GridUnopClass(UnaryImag,imag(a));
|
GridUnopClass(UnaryImag,imag(a));
|
||||||
GridUnopClass(UnaryToReal,toReal(a));
|
GridUnopClass(UnaryToReal,toReal(a));
|
||||||
@ -290,6 +291,7 @@ GRID_DEF_UNOP(conjugate,UnaryConj);
|
|||||||
GRID_DEF_UNOP(trace,UnaryTrace);
|
GRID_DEF_UNOP(trace,UnaryTrace);
|
||||||
GRID_DEF_UNOP(transpose,UnaryTranspose);
|
GRID_DEF_UNOP(transpose,UnaryTranspose);
|
||||||
GRID_DEF_UNOP(Ta,UnaryTa);
|
GRID_DEF_UNOP(Ta,UnaryTa);
|
||||||
|
GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup);
|
||||||
GRID_DEF_UNOP(real,UnaryReal);
|
GRID_DEF_UNOP(real,UnaryReal);
|
||||||
GRID_DEF_UNOP(imag,UnaryImag);
|
GRID_DEF_UNOP(imag,UnaryImag);
|
||||||
GRID_DEF_UNOP(toReal,UnaryToReal);
|
GRID_DEF_UNOP(toReal,UnaryToReal);
|
||||||
|
@ -29,6 +29,9 @@ extern int GridCshiftPermuteMap[4][16];
|
|||||||
class LatticeBase {};
|
class LatticeBase {};
|
||||||
class LatticeExpressionBase {};
|
class LatticeExpressionBase {};
|
||||||
|
|
||||||
|
template<class T> using Vector = std::vector<T,alignedAllocator<T> >; // Aligned allocator??
|
||||||
|
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; // Aligned allocator??
|
||||||
|
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
||||||
public:
|
public:
|
||||||
@ -59,7 +62,7 @@ public:
|
|||||||
|
|
||||||
GridBase *_grid;
|
GridBase *_grid;
|
||||||
int checkerboard;
|
int checkerboard;
|
||||||
std::vector<vobj,alignedAllocator<vobj> > _odata;
|
Vector<vobj> _odata;
|
||||||
|
|
||||||
// to pthread need a computable loop where loop induction is not required
|
// to pthread need a computable loop where loop induction is not required
|
||||||
int begin(void) { return 0;};
|
int begin(void) { return 0;};
|
||||||
|
513
lib/parallelIO/BinaryIO.h
Normal file
513
lib/parallelIO/BinaryIO.h
Normal file
@ -0,0 +1,513 @@
|
|||||||
|
#ifndef GRID_BINARY_IO_H
|
||||||
|
#define GRID_BINARY_IO_H
|
||||||
|
|
||||||
|
#ifdef HAVE_ENDIAN_H
|
||||||
|
#include <endian.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include <arpa/inet.h>
|
||||||
|
#include <algorithm>
|
||||||
|
// 64bit endian swap is a portability pain
|
||||||
|
#ifndef __has_builtin // Optional of course.
|
||||||
|
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_DECL_BE64TOH
|
||||||
|
#undef Grid_ntohll
|
||||||
|
#define Grid_ntohll be64toh
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_DECL_NTOHLL
|
||||||
|
#undef Grid_ntohll
|
||||||
|
#define Grid_ntohll ntohll
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef Grid_ntohll
|
||||||
|
|
||||||
|
#if BYTE_ORDER == BIG_ENDIAN
|
||||||
|
|
||||||
|
#define Grid_ntohll(A) (A)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if __has_builtin(__builtin_bswap64)
|
||||||
|
#define Grid_ntohll(A) __builtin_bswap64(A)
|
||||||
|
#else
|
||||||
|
#error
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
// A little helper
|
||||||
|
inline void removeWhitespace(std::string &key)
|
||||||
|
{
|
||||||
|
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
class BinaryIO {
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
|
||||||
|
// Network is big endian
|
||||||
|
static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);}
|
||||||
|
static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);}
|
||||||
|
static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);}
|
||||||
|
static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);}
|
||||||
|
|
||||||
|
static inline void be32toh_v(void *file_object,uint32_t bytes)
|
||||||
|
{
|
||||||
|
uint32_t * f = (uint32_t *)file_object;
|
||||||
|
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
||||||
|
f[i] = ntohl(f[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LE must Swap and switch to host
|
||||||
|
static inline void le32toh_v(void *file_object,uint32_t bytes)
|
||||||
|
{
|
||||||
|
uint32_t *fp = (uint32_t *)file_object;
|
||||||
|
uint32_t f;
|
||||||
|
|
||||||
|
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
||||||
|
f = fp[i];
|
||||||
|
// got network order and the network to host
|
||||||
|
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||||
|
fp[i] = ntohl(f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BE is same as network
|
||||||
|
static inline void be64toh_v(void *file_object,uint32_t bytes)
|
||||||
|
{
|
||||||
|
uint64_t * f = (uint64_t *)file_object;
|
||||||
|
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
||||||
|
f[i] = Grid_ntohll(f[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LE must swap and switch;
|
||||||
|
static inline void le64toh_v(void *file_object,uint32_t bytes)
|
||||||
|
{
|
||||||
|
uint64_t *fp = (uint64_t *)file_object;
|
||||||
|
uint64_t f,g;
|
||||||
|
|
||||||
|
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
||||||
|
f = fp[i];
|
||||||
|
// got network order and the network to host
|
||||||
|
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||||
|
g = g << 32;
|
||||||
|
f = f >> 32;
|
||||||
|
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||||
|
fp[i] = Grid_ntohll(g);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class fobj,class munger> static inline void Uint32Checksum(Lattice<vobj> lat,munger munge,uint32_t &csum)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
GridBase *grid = lat._grid ;
|
||||||
|
std::cout <<GridLogMessage<< "Uint32Checksum "<<norm2(lat)<<std::endl;
|
||||||
|
sobj siteObj;
|
||||||
|
fobj fileObj;
|
||||||
|
|
||||||
|
csum = 0;
|
||||||
|
std::vector<int> lcoor;
|
||||||
|
for(int l=0;l<grid->lSites();l++){
|
||||||
|
grid->CoorFromIndex(lcoor,l,grid->_ldimensions);
|
||||||
|
peekLocalSite(siteObj,lat,lcoor);
|
||||||
|
munge(siteObj,fileObj,csum);
|
||||||
|
}
|
||||||
|
grid->GlobalSum(csum);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void Uint32Checksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
|
||||||
|
{
|
||||||
|
for(int i=0;i*sizeof(uint32_t)<buf_size_bytes;i++){
|
||||||
|
csum=csum+buf[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class fobj,class munger>
|
||||||
|
static inline uint32_t readObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
|
GridBase *grid = Umu._grid;
|
||||||
|
|
||||||
|
std::cout<< GridLogMessage<< "Serial read I/O "<< file<< std::endl;
|
||||||
|
|
||||||
|
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||||
|
int ieee32 = (format == std::string("IEEE32"));
|
||||||
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
|
int ieee64 = (format == std::string("IEEE64"));
|
||||||
|
|
||||||
|
// Find the location of each site and send to primary node
|
||||||
|
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
|
||||||
|
// available (how short sighted is that?)
|
||||||
|
std::ifstream fin(file,std::ios::binary|std::ios::in);
|
||||||
|
fin.seekg(offset);
|
||||||
|
|
||||||
|
Umu = zero;
|
||||||
|
uint32_t csum=0;
|
||||||
|
fobj file_object;
|
||||||
|
sobj munged;
|
||||||
|
|
||||||
|
for(int t=0;t<grid->_fdimensions[3];t++){
|
||||||
|
for(int z=0;z<grid->_fdimensions[2];z++){
|
||||||
|
for(int y=0;y<grid->_fdimensions[1];y++){
|
||||||
|
for(int x=0;x<grid->_fdimensions[0];x++){
|
||||||
|
|
||||||
|
std::vector<int> site({x,y,z,t});
|
||||||
|
|
||||||
|
if ( grid->IsBoss() ) {
|
||||||
|
fin.read((char *)&file_object,sizeof(file_object));
|
||||||
|
|
||||||
|
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
|
||||||
|
|
||||||
|
munge(file_object,munged,csum);
|
||||||
|
}
|
||||||
|
// The boss who read the file has their value poked
|
||||||
|
pokeSite(munged,Umu,site);
|
||||||
|
}}}}
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class fobj,class munger>
|
||||||
|
static inline uint32_t writeObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string & format)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
|
GridBase *grid = Umu._grid;
|
||||||
|
|
||||||
|
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||||
|
int ieee32 = (format == std::string("IEEE32"));
|
||||||
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
|
int ieee64 = (format == std::string("IEEE64"));
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
// Serialise through node zero
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
std::cout<< GridLogMessage<< "Serial write I/O "<< file<<std::endl;
|
||||||
|
|
||||||
|
std::ofstream fout;
|
||||||
|
if ( grid->IsBoss() ) {
|
||||||
|
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
||||||
|
fout.seekp(offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t csum=0;
|
||||||
|
fobj file_object;
|
||||||
|
sobj unmunged;
|
||||||
|
for(int t=0;t<grid->_fdimensions[3];t++){
|
||||||
|
for(int z=0;z<grid->_fdimensions[2];z++){
|
||||||
|
for(int y=0;y<grid->_fdimensions[1];y++){
|
||||||
|
for(int x=0;x<grid->_fdimensions[0];x++){
|
||||||
|
|
||||||
|
std::vector<int> site({x,y,z,t});
|
||||||
|
// peek & write
|
||||||
|
peekSite(unmunged,Umu,site);
|
||||||
|
|
||||||
|
munge(unmunged,file_object,csum);
|
||||||
|
|
||||||
|
|
||||||
|
if ( grid->IsBoss() ) {
|
||||||
|
|
||||||
|
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
|
||||||
|
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
|
||||||
|
|
||||||
|
fout.write((char *)&file_object,sizeof(file_object));
|
||||||
|
}
|
||||||
|
}}}}
|
||||||
|
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class fobj,class munger>
|
||||||
|
static inline uint32_t readObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
|
GridBase *grid = Umu._grid;
|
||||||
|
|
||||||
|
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||||
|
int ieee32 = (format == std::string("IEEE32"));
|
||||||
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
|
int ieee64 = (format == std::string("IEEE64"));
|
||||||
|
|
||||||
|
|
||||||
|
// Take into account block size of parallel file systems want about
|
||||||
|
// 4-16MB chunks.
|
||||||
|
// Ideally one reader/writer per xy plane and read these contiguously
|
||||||
|
// with comms from nominated I/O nodes.
|
||||||
|
std::ifstream fin;
|
||||||
|
|
||||||
|
int nd = grid->_ndimension;
|
||||||
|
std::vector<int> parallel(nd,1);
|
||||||
|
std::vector<int> ioproc (nd);
|
||||||
|
std::vector<int> start(nd);
|
||||||
|
std::vector<int> range(nd);
|
||||||
|
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
assert(grid->CheckerBoarded(d) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t slice_vol = 1;
|
||||||
|
|
||||||
|
int IOnode = 1;
|
||||||
|
for(int d=0;d<grid->_ndimension;d++) {
|
||||||
|
|
||||||
|
if ( d==0 ) parallel[d] = 0;
|
||||||
|
if (parallel[d]) {
|
||||||
|
range[d] = grid->_ldimensions[d];
|
||||||
|
start[d] = grid->_processor_coor[d]*range[d];
|
||||||
|
ioproc[d]= grid->_processor_coor[d];
|
||||||
|
} else {
|
||||||
|
range[d] = grid->_gdimensions[d];
|
||||||
|
start[d] = 0;
|
||||||
|
ioproc[d]= 0;
|
||||||
|
|
||||||
|
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||||
|
}
|
||||||
|
slice_vol = slice_vol * range[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
uint32_t tmp = IOnode;
|
||||||
|
grid->GlobalSum(tmp);
|
||||||
|
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||||
|
for(int d=0;d<grid->_ndimension;d++){
|
||||||
|
std::cout<< range[d];
|
||||||
|
if( d< grid->_ndimension-1 )
|
||||||
|
std::cout<< " x ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int myrank = grid->ThisRank();
|
||||||
|
int iorank = grid->RankFromProcessorCoor(ioproc);
|
||||||
|
|
||||||
|
if ( IOnode ) {
|
||||||
|
fin.open(file,std::ios::binary|std::ios::in);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// Find the location of each site and send to primary node
|
||||||
|
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
|
||||||
|
// available (how short sighted is that?)
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
Umu = zero;
|
||||||
|
uint32_t csum=0;
|
||||||
|
fobj fileObj;
|
||||||
|
sobj siteObj;
|
||||||
|
|
||||||
|
// need to implement these loops in Nd independent way with a lexico conversion
|
||||||
|
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||||
|
|
||||||
|
std::vector<int> tsite(nd); // temporary mixed up site
|
||||||
|
std::vector<int> gsite(nd);
|
||||||
|
std::vector<int> lsite(nd);
|
||||||
|
std::vector<int> iosite(nd);
|
||||||
|
|
||||||
|
grid->CoorFromIndex(tsite,tlex,range);
|
||||||
|
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||||
|
gsite[d] = tsite[d]+start[d]; // global site
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////
|
||||||
|
// Get the rank of owner of data
|
||||||
|
/////////////////////////
|
||||||
|
int rank, o_idx,i_idx, g_idx;
|
||||||
|
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
|
||||||
|
grid->GlobalCoorToGlobalIndex(gsite,g_idx);
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// iorank reads from the seek
|
||||||
|
////////////////////////////////
|
||||||
|
if (myrank == iorank) {
|
||||||
|
|
||||||
|
fin.seekg(offset+g_idx*sizeof(fileObj));
|
||||||
|
fin.read((char *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
|
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
|
munge(fileObj,siteObj,csum);
|
||||||
|
|
||||||
|
if ( rank != myrank ) {
|
||||||
|
grid->SendTo((void *)&siteObj,rank,sizeof(siteObj));
|
||||||
|
} else {
|
||||||
|
pokeLocalSite(siteObj,Umu,lsite);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if ( myrank == rank ) {
|
||||||
|
grid->RecvFrom((void *)&siteObj,iorank,sizeof(siteObj));
|
||||||
|
pokeLocalSite(siteObj,Umu,lsite);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
grid->Barrier(); // necessary?
|
||||||
|
}
|
||||||
|
|
||||||
|
grid->GlobalSum(csum);
|
||||||
|
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// Parallel writer
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
template<class vobj,class fobj,class munger>
|
||||||
|
static inline uint32_t writeObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string & format)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
GridBase *grid = Umu._grid;
|
||||||
|
|
||||||
|
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||||
|
int ieee32 = (format == std::string("IEEE32"));
|
||||||
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
|
int ieee64 = (format == std::string("IEEE64"));
|
||||||
|
|
||||||
|
int nd = grid->_ndimension;
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
assert(grid->CheckerBoarded(d) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> parallel(nd,1);
|
||||||
|
std::vector<int> ioproc (nd);
|
||||||
|
std::vector<int> start(nd);
|
||||||
|
std::vector<int> range(nd);
|
||||||
|
|
||||||
|
uint64_t slice_vol = 1;
|
||||||
|
|
||||||
|
int IOnode = 1;
|
||||||
|
|
||||||
|
for(int d=0;d<grid->_ndimension;d++) {
|
||||||
|
|
||||||
|
if ( d==0 ) parallel[d] = 0;
|
||||||
|
|
||||||
|
if (parallel[d]) {
|
||||||
|
range[d] = grid->_ldimensions[d];
|
||||||
|
start[d] = grid->_processor_coor[d]*range[d];
|
||||||
|
ioproc[d]= grid->_processor_coor[d];
|
||||||
|
} else {
|
||||||
|
range[d] = grid->_gdimensions[d];
|
||||||
|
start[d] = 0;
|
||||||
|
ioproc[d]= 0;
|
||||||
|
|
||||||
|
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
slice_vol = slice_vol * range[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
uint32_t tmp = IOnode;
|
||||||
|
grid->GlobalSum(tmp);
|
||||||
|
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||||
|
for(int d=0;d<grid->_ndimension;d++){
|
||||||
|
std::cout<< range[d];
|
||||||
|
if( d< grid->_ndimension-1 )
|
||||||
|
std::cout<< " x ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int myrank = grid->ThisRank();
|
||||||
|
int iorank = grid->RankFromProcessorCoor(ioproc);
|
||||||
|
|
||||||
|
// Take into account block size of parallel file systems want about
|
||||||
|
// 4-16MB chunks.
|
||||||
|
// Ideally one reader/writer per xy plane and read these contiguously
|
||||||
|
// with comms from nominated I/O nodes.
|
||||||
|
std::ofstream fout;
|
||||||
|
if ( IOnode ) fout.open(file,std::ios::binary|std::ios::in|std::ios::out);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// Find the location of each site and send to primary node
|
||||||
|
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
|
||||||
|
// available (how short sighted is that?)
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
uint32_t csum=0;
|
||||||
|
fobj fileObj;
|
||||||
|
sobj siteObj;
|
||||||
|
|
||||||
|
|
||||||
|
// need to implement these loops in Nd independent way with a lexico conversion
|
||||||
|
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||||
|
|
||||||
|
std::vector<int> tsite(nd); // temporary mixed up site
|
||||||
|
std::vector<int> gsite(nd);
|
||||||
|
std::vector<int> lsite(nd);
|
||||||
|
std::vector<int> iosite(nd);
|
||||||
|
|
||||||
|
grid->CoorFromIndex(tsite,tlex,range);
|
||||||
|
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||||
|
gsite[d] = tsite[d]+start[d]; // global site
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/////////////////////////
|
||||||
|
// Get the rank of owner of data
|
||||||
|
/////////////////////////
|
||||||
|
int rank, o_idx,i_idx, g_idx;
|
||||||
|
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
|
||||||
|
grid->GlobalCoorToGlobalIndex(gsite,g_idx);
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// iorank writes from the seek
|
||||||
|
////////////////////////////////
|
||||||
|
if (myrank == iorank) {
|
||||||
|
|
||||||
|
if ( rank != myrank ) {
|
||||||
|
grid->RecvFrom((void *)&siteObj,rank,sizeof(siteObj));
|
||||||
|
} else {
|
||||||
|
peekLocalSite(siteObj,Umu,lsite);
|
||||||
|
}
|
||||||
|
|
||||||
|
munge(siteObj,fileObj,csum);
|
||||||
|
|
||||||
|
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
|
fout.seekp(offset+g_idx*sizeof(fileObj));
|
||||||
|
fout.write((char *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if ( myrank == rank ) {
|
||||||
|
peekLocalSite(siteObj,Umu,lsite);
|
||||||
|
grid->SendTo((void *)&siteObj,iorank,sizeof(siteObj));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
grid->Barrier(); // necessary// or every 16 packets to rate throttle??
|
||||||
|
}
|
||||||
|
|
||||||
|
grid->GlobalSum(csum);
|
||||||
|
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -7,57 +7,23 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
#ifdef HAVE_ENDIAN_H
|
#include <unistd.h>
|
||||||
#include <endian.h>
|
#include <sys/utsname.h>
|
||||||
#endif
|
#include <pwd.h>
|
||||||
|
|
||||||
|
|
||||||
#include <arpa/inet.h>
|
|
||||||
|
|
||||||
// 64bit endian swap is a portability pain
|
|
||||||
#ifndef __has_builtin // Optional of course.
|
|
||||||
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if HAVE_DECL_BE64TOH
|
|
||||||
#undef Grid_ntohll
|
|
||||||
#define Grid_ntohll be64toh
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if HAVE_DECL_NTOHLL
|
|
||||||
#undef Grid_ntohll
|
|
||||||
#define Grid_ntohll ntohll
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef Grid_ntohll
|
|
||||||
|
|
||||||
#if BYTE_ORDER == BIG_ENDIAN
|
|
||||||
|
|
||||||
#define Grid_ntohll(A) (A)
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#if __has_builtin(__builtin_bswap64)
|
|
||||||
#define Grid_ntohll(A) __builtin_bswap64(A)
|
|
||||||
#else
|
|
||||||
#error
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
using namespace QCD;
|
using namespace Grid;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Some data types for intermediate storage
|
// Some data types for intermediate storage
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
|
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
|
||||||
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
|
|
||||||
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
|
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
|
||||||
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
|
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
|
||||||
|
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// header specification/interpretation
|
// header specification/interpretation
|
||||||
@ -86,50 +52,173 @@ class NerscField {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Bit and Physical Checksumming and QA of data
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
inline void NerscGrid(GridBase *grid,NerscField &header)
|
||||||
|
{
|
||||||
|
assert(grid->_ndimension==4);
|
||||||
|
for(int d=0;d<4;d++) {
|
||||||
|
header.dimension[d] = grid->_fdimensions[d];
|
||||||
|
}
|
||||||
|
for(int d=0;d<4;d++) {
|
||||||
|
header.boundary[d] = std::string("PERIODIC");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class GaugeField>
|
||||||
|
inline void NerscStatistics(GaugeField & data,NerscField &header)
|
||||||
|
{
|
||||||
|
header.link_trace=Grid::QCD::WilsonLoops<GaugeField>::linkTrace(data);
|
||||||
|
header.plaquette =Grid::QCD::WilsonLoops<GaugeField>::avgPlaquette(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void NerscMachineCharacteristics(NerscField &header)
|
||||||
|
{
|
||||||
|
// Who
|
||||||
|
struct passwd *pw = getpwuid (getuid());
|
||||||
|
if (pw) header.creator = std::string(pw->pw_name);
|
||||||
|
|
||||||
|
// When
|
||||||
|
std::time_t t = std::time(nullptr);
|
||||||
|
std::tm tm = *std::localtime(&t);
|
||||||
|
std::ostringstream oss;
|
||||||
|
// oss << std::put_time(&tm, "%c %Z");
|
||||||
|
header.creation_date = oss.str();
|
||||||
|
header.archive_date = header.creation_date;
|
||||||
|
|
||||||
|
// What
|
||||||
|
struct utsname name; uname(&name);
|
||||||
|
header.creator_hardware = std::string(name.nodename)+"-";
|
||||||
|
header.creator_hardware+= std::string(name.machine)+"-";
|
||||||
|
header.creator_hardware+= std::string(name.sysname)+"-";
|
||||||
|
header.creator_hardware+= std::string(name.release);
|
||||||
|
|
||||||
|
}
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Utilities ; these are QCD aware
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
|
||||||
|
{
|
||||||
|
BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum);
|
||||||
|
}
|
||||||
|
inline void reconstruct3(LorentzColourMatrix & cm)
|
||||||
|
{
|
||||||
|
const int x=0;
|
||||||
|
const int y=1;
|
||||||
|
const int z=2;
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
|
||||||
|
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
|
||||||
|
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class fobj,class sobj>
|
||||||
|
struct NerscSimpleMunger{
|
||||||
|
|
||||||
|
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
for(int i=0;i<3;i++){
|
||||||
|
for(int j=0;j<3;j++){
|
||||||
|
out(mu)()(i,j) = in(mu)()(i,j);
|
||||||
|
}}}
|
||||||
|
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class fobj,class sobj>
|
||||||
|
struct NerscSimpleUnmunger{
|
||||||
|
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
for(int i=0;i<Nc;i++){
|
||||||
|
for(int j=0;j<Nc;j++){
|
||||||
|
out(mu)()(i,j) = in(mu)()(i,j);
|
||||||
|
}}}
|
||||||
|
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class fobj,class sobj>
|
||||||
|
struct Nersc3x2munger{
|
||||||
|
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
||||||
|
|
||||||
|
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
for(int i=0;i<2;i++){
|
||||||
|
for(int j=0;j<3;j++){
|
||||||
|
out(mu)()(i,j) = in(mu)(i)(j);
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
reconstruct3(out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class fobj,class sobj>
|
||||||
|
struct Nersc3x2unmunger{
|
||||||
|
|
||||||
|
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
||||||
|
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
for(int i=0;i<2;i++){
|
||||||
|
for(int j=0;j<3;j++){
|
||||||
|
out(mu)(i)(j) = in(mu)()(i,j);
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Write and read from fstream; comput header offset for payload
|
// Write and read from fstream; comput header offset for payload
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
inline unsigned int writeNerscHeader(NerscField &field,std::string file)
|
class NerscIO : public BinaryIO {
|
||||||
{
|
public:
|
||||||
std::ofstream fout(file,std::ios::out);
|
|
||||||
|
static inline unsigned int writeHeader(NerscField &field,std::string file)
|
||||||
|
{
|
||||||
|
std::ofstream fout(file,std::ios::out);
|
||||||
|
|
||||||
fout.seekp(0,std::ios::beg);
|
fout.seekp(0,std::ios::beg);
|
||||||
fout << "BEGIN_HEADER" << std::endl;
|
fout << "BEGIN_HEADER" << std::endl;
|
||||||
fout << "HDR_VERSION = " << field.hdr_version << std::endl;
|
fout << "HDR_VERSION = " << field.hdr_version << std::endl;
|
||||||
fout << "DATATYPE = " << field.data_type << std::endl;
|
fout << "DATATYPE = " << field.data_type << std::endl;
|
||||||
fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
|
fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
|
||||||
|
|
||||||
for(int i=0;i<4;i++){
|
for(int i=0;i<4;i++){
|
||||||
fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
|
fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
|
||||||
}
|
}
|
||||||
// just to keep the space and write it later
|
// just to keep the space and write it later
|
||||||
fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
|
fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
|
||||||
fout << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;
|
fout << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;
|
||||||
for(int i=0;i<4;i++){
|
for(int i=0;i<4;i++){
|
||||||
fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
|
fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
|
||||||
}
|
}
|
||||||
fout << "CHECKSUM = "<< std::hex << std::setw(16) << 0 << field.checksum << std::endl;
|
|
||||||
|
|
||||||
fout << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;
|
fout << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::endl;
|
||||||
fout << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;
|
fout << std::dec;
|
||||||
fout << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;
|
|
||||||
fout << "CREATOR = " << field.creator << std::endl;
|
fout << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;
|
||||||
fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
|
fout << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;
|
||||||
fout << "CREATION_DATE = " << field.creation_date << std::endl;
|
fout << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;
|
||||||
fout << "ARCHIVE_DATE = " << field.archive_date << std::endl;
|
fout << "CREATOR = " << field.creator << std::endl;
|
||||||
fout << "FLOATING_POINT = " << field.floating_point << std::endl;
|
fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
|
||||||
fout << "END_HEADER" << std::endl;
|
fout << "CREATION_DATE = " << field.creation_date << std::endl;
|
||||||
field.data_start = fout.tellp();
|
fout << "ARCHIVE_DATE = " << field.archive_date << std::endl;
|
||||||
return field.data_start;
|
fout << "FLOATING_POINT = " << field.floating_point << std::endl;
|
||||||
|
fout << "END_HEADER" << std::endl;
|
||||||
|
field.data_start = fout.tellp();
|
||||||
|
return field.data_start;
|
||||||
}
|
}
|
||||||
|
|
||||||
// A little helper
|
|
||||||
inline void removeWhitespace(std::string &key)
|
|
||||||
{
|
|
||||||
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
|
|
||||||
}
|
|
||||||
// for the header-reader
|
// for the header-reader
|
||||||
inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
|
static inline int readHeader(std::string file,GridBase *grid, NerscField &field)
|
||||||
{
|
{
|
||||||
int offset=0;
|
int offset=0;
|
||||||
std::map<std::string,std::string> header;
|
std::map<std::string,std::string> header;
|
||||||
@ -163,7 +252,6 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
|
|||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// chomp the values
|
// chomp the values
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
|
|
||||||
field.hdr_version = header["HDR_VERSION"];
|
field.hdr_version = header["HDR_VERSION"];
|
||||||
field.data_type = header["DATATYPE"];
|
field.data_type = header["DATATYPE"];
|
||||||
field.storage_format = header["STORAGE_FORMAT"];
|
field.storage_format = header["STORAGE_FORMAT"];
|
||||||
@ -200,314 +288,21 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// Utilities
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
inline void reconstruct3(LorentzColourMatrix & cm)
|
|
||||||
{
|
|
||||||
const int x=0;
|
|
||||||
const int y=1;
|
|
||||||
const int z=2;
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
|
|
||||||
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
|
|
||||||
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void inline be32toh_v(void *file_object,uint32_t bytes)
|
|
||||||
{
|
|
||||||
uint32_t * f = (uint32_t *)file_object;
|
|
||||||
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
|
||||||
f[i] = ntohl(f[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void inline le32toh_v(void *file_object,uint32_t bytes)
|
|
||||||
{
|
|
||||||
uint32_t *fp = (uint32_t *)file_object;
|
|
||||||
|
|
||||||
uint32_t f;
|
|
||||||
|
|
||||||
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
|
||||||
f = fp[i];
|
|
||||||
// got network order and the network to host
|
|
||||||
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
|
||||||
fp[i] = ntohl(f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void inline be64toh_v(void *file_object,uint32_t bytes)
|
|
||||||
{
|
|
||||||
uint64_t * f = (uint64_t *)file_object;
|
|
||||||
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
|
||||||
f[i] = Grid_ntohll(f[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void inline le64toh_v(void *file_object,uint32_t bytes)
|
|
||||||
{
|
|
||||||
uint64_t *fp = (uint64_t *)file_object;
|
|
||||||
uint64_t f,g;
|
|
||||||
|
|
||||||
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
|
||||||
f = fp[i];
|
|
||||||
// got network order and the network to host
|
|
||||||
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
|
||||||
g = g << 32;
|
|
||||||
f = f >> 32;
|
|
||||||
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
|
||||||
fp[i] = ntohl(g);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size,uint32_t &csum)
|
|
||||||
{
|
|
||||||
for(int i=0;i*sizeof(uint32_t)<buf_size;i++){
|
|
||||||
csum=csum+buf[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class fobj,class sobj>
|
|
||||||
struct NerscSimpleMunger{
|
|
||||||
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
|
||||||
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
for(int i=0;i<3;i++){
|
|
||||||
for(int j=0;j<3;j++){
|
|
||||||
out(mu)()(i,j) = in(mu)()(i,j);
|
|
||||||
}}}
|
|
||||||
|
|
||||||
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
|
||||||
};
|
|
||||||
};
|
|
||||||
template<class fobj,class sobj>
|
|
||||||
struct NerscSimpleUnmunger{
|
|
||||||
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
for(int i=0;i<3;i++){
|
|
||||||
for(int j=0;j<3;j++){
|
|
||||||
out(mu)()(i,j) = in(mu)()(i,j);
|
|
||||||
}}}
|
|
||||||
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class fobj,class sobj>
|
|
||||||
struct Nersc3x2munger{
|
|
||||||
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
|
||||||
|
|
||||||
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
|
||||||
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
for(int i=0;i<2;i++){
|
|
||||||
for(int j=0;j<3;j++){
|
|
||||||
out(mu)()(i,j) = in(mu)(i)(j);
|
|
||||||
}}
|
|
||||||
}
|
|
||||||
reconstruct3(out);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class fobj,class sobj>
|
|
||||||
struct Nersc3x2unmunger{
|
|
||||||
|
|
||||||
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
|
||||||
|
|
||||||
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
|
||||||
|
|
||||||
for(int mu=0;mu<4;mu++){
|
|
||||||
for(int i=0;i<2;i++){
|
|
||||||
for(int j=0;j<3;j++){
|
|
||||||
out(mu)(i)(j) = in(mu)()(i,j);
|
|
||||||
}}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Template wizardry to map types to strings for NERSC in an extensible way
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
|
||||||
template<class vobj> struct NerscDataType {
|
|
||||||
static void DataType (std::string &str) { str = std::string("4D_BINARY_UNKNOWN"); };
|
|
||||||
static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG"); };
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct NerscDataType<iColourMatrix<ComplexD> > {
|
|
||||||
static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); };
|
|
||||||
static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG");};
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct NerscDataType<iColourMatrix<ComplexF> > {
|
|
||||||
static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); };
|
|
||||||
static void FloatingPoint(std::string &str) { str = std::string("IEEE32BIG");};
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// Bit and Physical Checksumming and QA of data
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
/*
|
|
||||||
template<class vobj> inline uint32_t NerscChecksum(Lattice<vobj> & data)
|
|
||||||
{
|
|
||||||
uint32_t sum;
|
|
||||||
for(int ss=0;ss<data._grid->Osites();ss++){
|
|
||||||
uint32_t *iptr = (uint32_t *)& data._odata[0] ;
|
|
||||||
for(int i=0;i<sizeof(vobj);i+=sizeof(uint32_t)){
|
|
||||||
sum=sum+iptr[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
data._grid->globalSum(sum);
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
template<class vobj> inline void NerscPhysicalCharacteristics(Lattice<vobj> & data,NerscField &header)
|
|
||||||
{
|
|
||||||
header.data_type = NerscDataType<vobj>::DataType;
|
|
||||||
header.floating_point = NerscDataType<vobj>::FloatingPoint;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> inline void NerscPhysicalCharacteristics(LatticeGaugeField & data,NerscField &header)
|
|
||||||
{
|
|
||||||
NerscDataType<decltype(data._odata[0])>::DataType(header.data_type);
|
|
||||||
NerscDataType<decltype(data._odata[0])>::FloatingPoint(header.floating_point);
|
|
||||||
header.link_trace=1.0;
|
|
||||||
header.plaquette =1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class vobj> inline void NerscStatisics(Lattice<vobj> & data,NerscField &header)
|
|
||||||
{
|
|
||||||
assert(data._grid->_ndimension==4);
|
|
||||||
|
|
||||||
for(int d=0;d<4;d++)
|
|
||||||
header.dimension[d] = data._grid->_fdimensions[d];
|
|
||||||
|
|
||||||
// compute checksum and any physical properties contained for this type
|
|
||||||
// header.checksum = NerscChecksum(data);
|
|
||||||
|
|
||||||
NerscPhysicalCharacteristics(data,header);
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Now the meat: the object readers
|
// Now the meat: the object readers
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class vobj,class sobj,class fobj,class munger>
|
|
||||||
inline void readNerscObject(Lattice<vobj> &Umu,std::string file,munger munge,int offset,std::string &format)
|
template<class vsimd>
|
||||||
|
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file)
|
||||||
{
|
{
|
||||||
|
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||||
|
|
||||||
GridBase *grid = Umu._grid;
|
GridBase *grid = Umu._grid;
|
||||||
|
int offset = readHeader(file,Umu._grid,header);
|
||||||
|
|
||||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
NerscField clone(header);
|
||||||
int ieee32 = (format == std::string("IEEE32"));
|
|
||||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
|
||||||
int ieee64 = (format == std::string("IEEE64"));
|
|
||||||
|
|
||||||
// Find the location of each site and send to primary node
|
|
||||||
// for(int site=0; site < Layout::vol(); ++site){
|
|
||||||
// multi1d<int> coord = crtesn(site, Layout::lattSize());
|
|
||||||
// for(int dd=0; dd<Nd; dd++){ /* dir */
|
|
||||||
// cfg_in.readArray(su3_buffer, float_size, mat_size);
|
|
||||||
//
|
|
||||||
// Above from Chroma; defines loop order now that NERSC doc no longer
|
|
||||||
// available (how short sighted is that?)
|
|
||||||
{
|
|
||||||
std::ifstream fin(file,std::ios::binary|std::ios::in);
|
|
||||||
fin.seekg(offset);
|
|
||||||
|
|
||||||
Umu = zero;
|
|
||||||
uint32_t csum=0;
|
|
||||||
fobj file_object;
|
|
||||||
sobj munged;
|
|
||||||
|
|
||||||
for(int t=0;t<grid->_fdimensions[3];t++){
|
|
||||||
for(int z=0;z<grid->_fdimensions[2];z++){
|
|
||||||
for(int y=0;y<grid->_fdimensions[1];y++){
|
|
||||||
for(int x=0;x<grid->_fdimensions[0];x++){
|
|
||||||
|
|
||||||
std::vector<int> site({x,y,z,t});
|
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
|
||||||
fin.read((char *)&file_object,sizeof(file_object));
|
|
||||||
|
|
||||||
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
|
|
||||||
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
|
|
||||||
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
|
|
||||||
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
|
|
||||||
|
|
||||||
munge(file_object,munged,csum);
|
|
||||||
}
|
|
||||||
// The boss who read the file has their value poked
|
|
||||||
pokeSite(munged,Umu,site);
|
|
||||||
}}}}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class vobj,class sobj,class fobj,class munger>
|
|
||||||
inline void writeNerscObject(Lattice<vobj> &Umu,std::string file,munger munge,int offset,
|
|
||||||
int sequence,double lt,double pl)
|
|
||||||
{
|
|
||||||
GridBase *grid = Umu._grid;
|
|
||||||
NerscField header;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
// First write the header; this is in wrong place
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
assert(grid->_ndimension == 4);
|
|
||||||
for(int d=0;d<4;d++){
|
|
||||||
header.dimension[d]=grid->_fdimensions[d];
|
|
||||||
header.boundary [d]=std::string("PERIODIC");;
|
|
||||||
}
|
|
||||||
header.hdr_version=std::string("WHATDAHECK");
|
|
||||||
// header.storage_format=storage_format<vobj>::string; // use template specialisation
|
|
||||||
// header.data_type=data_type<vobj>::string;
|
|
||||||
header.storage_format=std::string("debug");
|
|
||||||
header.data_type =std::string("debug");
|
|
||||||
|
|
||||||
//FIXME; use template specialisation to fill these out
|
|
||||||
header.link_trace =lt;
|
|
||||||
header.plaquette =pl;
|
|
||||||
header.checksum =0;
|
|
||||||
|
|
||||||
//
|
|
||||||
header.sequence_number =sequence;
|
|
||||||
header.ensemble_id =std::string("UKQCD");
|
|
||||||
header.ensemble_label =std::string("UKQCD");
|
|
||||||
header.creator =std::string("Tadahito");
|
|
||||||
header.creator_hardware=std::string("BlueGene/Q");
|
|
||||||
header.creation_date =std::string("AnnoDomini");
|
|
||||||
header.archive_date =std::string("AnnoDomini");
|
|
||||||
header.floating_point =std::string("IEEE64BIG");
|
|
||||||
// header.data_start=;
|
|
||||||
// unsigned int checksum;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
// Now write the body
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
{
|
|
||||||
std::ofstream fout(file,std::ios::binary|std::ios::out);
|
|
||||||
fout.seekp(offset);
|
|
||||||
|
|
||||||
Umu = zero;
|
|
||||||
uint32_t csum=0;
|
|
||||||
fobj file_object;
|
|
||||||
sobj unmunged;
|
|
||||||
for(int t=0;t<grid->_fdimensions[3];t++){
|
|
||||||
for(int z=0;z<grid->_fdimensions[2];z++){
|
|
||||||
for(int y=0;y<grid->_fdimensions[1];y++){
|
|
||||||
for(int x=0;x<grid->_fdimensions[0];x++){
|
|
||||||
std::vector<int> site({x,y,z,t});
|
|
||||||
peekSite(unmunged,Umu,site);
|
|
||||||
munge(unmunged,file_object,csum);
|
|
||||||
// broadcast & insert
|
|
||||||
fout.write((char *)&file_object,sizeof(file_object));
|
|
||||||
}}}}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std::string file)
|
|
||||||
{
|
|
||||||
GridBase *grid = Umu._grid;
|
|
||||||
|
|
||||||
int offset = readNerscHeader(file,Umu._grid,header);
|
|
||||||
|
|
||||||
std::string format(header.floating_point);
|
std::string format(header.floating_point);
|
||||||
|
|
||||||
@ -516,48 +311,106 @@ inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std
|
|||||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||||
int ieee64 = (format == std::string("IEEE64"));
|
int ieee64 = (format == std::string("IEEE64"));
|
||||||
|
|
||||||
|
uint32_t csum;
|
||||||
// depending on datatype, set up munger;
|
// depending on datatype, set up munger;
|
||||||
// munger is a function of <floating point, Real, data_type>
|
// munger is a function of <floating point, Real, data_type>
|
||||||
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
|
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
|
||||||
if ( ieee32 || ieee32big ) {
|
if ( ieee32 || ieee32big ) {
|
||||||
readNerscObject<vLorentzColourMatrix, LorentzColourMatrix, LorentzColour2x3F>
|
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
|
||||||
(Umu,file,
|
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
|
||||||
Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(),
|
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
|
||||||
offset,format);
|
|
||||||
}
|
}
|
||||||
if ( ieee64 || ieee64big ) {
|
if ( ieee64 || ieee64big ) {
|
||||||
readNerscObject<vLorentzColourMatrix, LorentzColourMatrix, LorentzColour2x3D>
|
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
|
||||||
(Umu,file,
|
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
|
||||||
Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),
|
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
|
||||||
offset,format);
|
|
||||||
}
|
}
|
||||||
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3X3") ) {
|
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3X3") ) {
|
||||||
if ( ieee32 || ieee32big ) {
|
if ( ieee32 || ieee32big ) {
|
||||||
readNerscObject<vLorentzColourMatrix,LorentzColourMatrix,LorentzColourMatrixF>
|
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
|
||||||
|
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
|
||||||
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
|
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
|
||||||
}
|
}
|
||||||
if ( ieee64 || ieee64big ) {
|
if ( ieee64 || ieee64big ) {
|
||||||
readNerscObject<vLorentzColourMatrix,LorentzColourMatrix,LorentzColourMatrixD>
|
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
|
||||||
|
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
|
||||||
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
|
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NerscStatistics<GaugeField>(Umu,clone);
|
||||||
|
|
||||||
|
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
|
||||||
|
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
|
||||||
|
assert(csum == header.checksum );
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"Read NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj>
|
template<class vsimd>
|
||||||
inline void writeNerscConfiguration(Lattice<vobj> &Umu,NerscField &header,std::string file)
|
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32)
|
||||||
{
|
{
|
||||||
GridBase &grid = Umu._grid;
|
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||||
|
|
||||||
|
typedef iLorentzColourMatrix<vsimd> vobj;
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
|
// Following should become arguments
|
||||||
|
NerscField header;
|
||||||
|
header.sequence_number = 1;
|
||||||
|
header.ensemble_id = "UKQCD";
|
||||||
|
header.ensemble_label = "DWF";
|
||||||
|
|
||||||
|
typedef LorentzColourMatrixD fobj3D;
|
||||||
|
typedef LorentzColour2x3D fobj2D;
|
||||||
|
typedef LorentzColourMatrixF fobj3f;
|
||||||
|
typedef LorentzColour2x3F fobj2f;
|
||||||
|
|
||||||
|
GridBase *grid = Umu._grid;
|
||||||
|
|
||||||
|
NerscGrid(grid,header);
|
||||||
|
NerscStatistics<GaugeField>(Umu,header);
|
||||||
|
NerscMachineCharacteristics(header);
|
||||||
|
|
||||||
|
uint32_t csum;
|
||||||
|
int offset;
|
||||||
|
|
||||||
NerscStatisics(Umu,header);
|
if ( two_row ) {
|
||||||
|
|
||||||
int offset = writeNerscHeader(header,file);
|
header.floating_point = std::string("IEEE64BIG");
|
||||||
|
header.data_type = std::string("4D_SU3_GAUGE");
|
||||||
|
Nersc3x2unmunger<fobj2D,sobj> munge;
|
||||||
|
BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum);
|
||||||
|
offset = writeHeader(header,file);
|
||||||
|
csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
|
||||||
|
|
||||||
writeNerscObject(Umu,NerscSimpleMunger<vobj,vobj>(),offset);
|
std::string file1 = file+"para";
|
||||||
}
|
int offset1 = writeHeader(header,file1);
|
||||||
|
int csum1=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file1,munge,offset,header.floating_point);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " TESTING PARALLEL WRITE offsets " << offset1 << " "<< offset << std::endl;
|
||||||
|
std::cout << GridLogMessage <<std::hex<< " TESTING PARALLEL WRITE csums " << csum1 << " "<< csum << std::endl;
|
||||||
|
std::cout << std::dec;
|
||||||
|
|
||||||
}
|
assert(offset1==offset);
|
||||||
|
assert(csum1==csum);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
header.floating_point = std::string("IEEE64BIG");
|
||||||
|
header.data_type = std::string("4D_SU3_GAUGE_3X3");
|
||||||
|
NerscSimpleUnmunger<fobj3D,sobj> munge;
|
||||||
|
BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
|
||||||
|
offset = writeHeader(header,file);
|
||||||
|
csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"Written NERSC Configuration "<<file<< " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}}
|
||||||
#endif
|
#endif
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
#ifndef SOURCE_PUGIXML_CPP
|
#ifndef SOURCE_PUGIXML_CPP
|
||||||
#define SOURCE_PUGIXML_CPP
|
#define SOURCE_PUGIXML_CPP
|
||||||
|
|
||||||
#include "pugixml.hpp"
|
#include <pugixml/pugixml.h>
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
@ -17,7 +17,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Include user configuration file (this can define various configuration macros)
|
// Include user configuration file (this can define various configuration macros)
|
||||||
#include "pugiconfig.hpp"
|
#include <pugixml/pugiconfig.hpp>
|
||||||
|
|
||||||
#ifndef HEADER_PUGIXML_HPP
|
#ifndef HEADER_PUGIXML_HPP
|
||||||
#define HEADER_PUGIXML_HPP
|
#define HEADER_PUGIXML_HPP
|
@ -62,7 +62,6 @@ namespace QCD {
|
|||||||
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||||
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||||
|
|
||||||
|
|
||||||
// Spin matrix
|
// Spin matrix
|
||||||
typedef iSpinMatrix<Complex > SpinMatrix;
|
typedef iSpinMatrix<Complex > SpinMatrix;
|
||||||
typedef iSpinMatrix<ComplexF > SpinMatrixF;
|
typedef iSpinMatrix<ComplexF > SpinMatrixF;
|
||||||
@ -154,7 +153,7 @@ namespace QCD {
|
|||||||
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
|
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
|
||||||
|
|
||||||
// singlets
|
// singlets
|
||||||
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
||||||
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
|
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
|
||||||
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
|
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
|
||||||
|
|
||||||
@ -162,7 +161,7 @@ namespace QCD {
|
|||||||
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
|
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
|
||||||
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
|
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
|
||||||
|
|
||||||
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
||||||
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
|
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
|
||||||
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
|
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
|
||||||
|
|
||||||
@ -251,6 +250,8 @@ namespace QCD {
|
|||||||
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
|
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
|
||||||
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
|
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
|
||||||
|
|
||||||
|
template<class GF> using LorentzScalar = Lattice<iScalar<typename GF::vector_object::element> >;
|
||||||
|
|
||||||
// Uhgg... typing this hurt ;)
|
// Uhgg... typing this hurt ;)
|
||||||
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
|
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
|
||||||
typedef Lattice<vColourVector> LatticeStaggeredFermion;
|
typedef Lattice<vColourVector> LatticeStaggeredFermion;
|
||||||
|
@ -7,16 +7,15 @@ template<class GaugeField>
|
|||||||
class Action {
|
class Action {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual void init (const GaugeField &U, GridParallelRNG& pRNG) = 0; //
|
// Boundary conditions? // Heatbath?
|
||||||
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions
|
||||||
virtual RealD S (const GaugeField &U) = 0; // evaluate the action
|
virtual RealD S (const GaugeField &U) = 0; // evaluate the action
|
||||||
virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative
|
virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative
|
||||||
virtual void refresh(const GaugeField & ) {}; // Default to no-op for actions with no internal fields
|
|
||||||
// Boundary conditions?
|
|
||||||
// Heatbath?
|
|
||||||
virtual ~Action() {};
|
virtual ~Action() {};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh
|
// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh
|
||||||
|
/*
|
||||||
template<class GaugeField, class FermionField>
|
template<class GaugeField, class FermionField>
|
||||||
class PseudoFermionAction : public Action<GaugeField> {
|
class PseudoFermionAction : public Action<GaugeField> {
|
||||||
public:
|
public:
|
||||||
@ -32,5 +31,28 @@ class PseudoFermionAction : public Action<GaugeField> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
|
template<class GaugeField> struct ActionLevel{
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef Action<GaugeField>* ActPtr; // now force the same colours as the rest of the code
|
||||||
|
|
||||||
|
int multiplier;
|
||||||
|
|
||||||
|
std::vector<ActPtr> actions;
|
||||||
|
|
||||||
|
ActionLevel(int mul = 1) : multiplier(mul) {
|
||||||
|
assert (mul > 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
void push_back(ActPtr ptr){
|
||||||
|
actions.push_back(ptr);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class GaugeField> using ActionSet = std::vector<ActionLevel< GaugeField > >;
|
||||||
|
|
||||||
|
|
||||||
}}
|
}}
|
||||||
#endif
|
#endif
|
||||||
|
@ -16,10 +16,6 @@
|
|||||||
#include <qcd/action/ActionBase.h>
|
#include <qcd/action/ActionBase.h>
|
||||||
#include <qcd/action/ActionParams.h>
|
#include <qcd/action/ActionParams.h>
|
||||||
|
|
||||||
////////////////////////////////////////////
|
|
||||||
// Gauge Actions
|
|
||||||
////////////////////////////////////////////
|
|
||||||
#include <qcd/action/gauge/WilsonGaugeAction.h>
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
@ -31,6 +27,17 @@
|
|||||||
#include <qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
#include <qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////
|
||||||
|
// Gauge Actions
|
||||||
|
////////////////////////////////////////////
|
||||||
|
#include <qcd/action/gauge/WilsonGaugeAction.h>
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
typedef WilsonGaugeAction<LatticeGaugeField> WilsonGaugeActionR;
|
||||||
|
typedef WilsonGaugeAction<LatticeGaugeFieldF> WilsonGaugeActionF;
|
||||||
|
typedef WilsonGaugeAction<LatticeGaugeFieldD> WilsonGaugeActionD;
|
||||||
|
}}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Explicit explicit template instantiation is still required in the .cc files
|
// Explicit explicit template instantiation is still required in the .cc files
|
||||||
//
|
//
|
||||||
@ -47,10 +54,10 @@
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define FermOpTemplateInstantiate(A) \
|
#define FermOpTemplateInstantiate(A) \
|
||||||
template class A<GparityWilsonImplF>; \
|
|
||||||
template class A<GparityWilsonImplD>; \
|
|
||||||
template class A<WilsonImplF>; \
|
template class A<WilsonImplF>; \
|
||||||
template class A<WilsonImplD>;
|
template class A<WilsonImplD>;
|
||||||
|
// template class A<GparityWilsonImplF>; \
|
||||||
|
// template class A<GparityWilsonImplD>;
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Fermion operators / actions
|
// Fermion operators / actions
|
||||||
@ -72,8 +79,8 @@
|
|||||||
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
|
#include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
|
||||||
#include <qcd/action/fermion/OverlapWilsonContFracTanhFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonContFracZolotarevFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
|
#include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
|
||||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
||||||
|
@ -82,7 +82,8 @@ namespace QCD {
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.checkerboard = in.checkerboard;
|
||||||
out = (4.0+mass)*in;
|
typename FermionField::scalar_type scal(4.0+mass);
|
||||||
|
out = scal*in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
|
@ -28,18 +28,31 @@ namespace Grid {
|
|||||||
void DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField &U,
|
void DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
|
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
|
||||||
|
#define HANDOPT
|
||||||
|
#ifdef HANDOPT
|
||||||
|
void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF,int sU,const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF,int sU,const FermionField &in, FermionField &out);
|
||||||
|
#else
|
||||||
|
|
||||||
void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out){
|
int sF,int sU,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
||||||
}
|
}
|
||||||
|
|
||||||
void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out){
|
int sF,int sU,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
WilsonKernels(const ImplParams &p= ImplParams()) : Base(p) {};
|
WilsonKernels(const ImplParams &p= ImplParams()) : Base(p) {};
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
#define REGISTER
|
#define REGISTER
|
||||||
|
|
||||||
#define LOAD_CHIMU \
|
#define LOAD_CHIMU \
|
||||||
const vSpinColourVector & ref (in._odata[offset]); \
|
const SiteSpinor & ref (in._odata[offset]); \
|
||||||
Chimu_00=ref()(0)(0);\
|
Chimu_00=ref()(0)(0);\
|
||||||
Chimu_01=ref()(0)(1);\
|
Chimu_01=ref()(0)(1);\
|
||||||
Chimu_02=ref()(0)(2);\
|
Chimu_02=ref()(0)(2);\
|
||||||
@ -18,7 +18,7 @@
|
|||||||
Chimu_32=ref()(3)(2);
|
Chimu_32=ref()(3)(2);
|
||||||
|
|
||||||
#define LOAD_CHI\
|
#define LOAD_CHI\
|
||||||
const vHalfSpinColourVector &ref(buf[offset]); \
|
const SiteHalfSpinor &ref(buf[offset]); \
|
||||||
Chi_00 = ref()(0)(0);\
|
Chi_00 = ref()(0)(0);\
|
||||||
Chi_01 = ref()(0)(1);\
|
Chi_01 = ref()(0)(1);\
|
||||||
Chi_02 = ref()(0)(2);\
|
Chi_02 = ref()(0)(2);\
|
||||||
@ -56,13 +56,13 @@
|
|||||||
UChi_02+= U_20*Chi_02;\
|
UChi_02+= U_20*Chi_02;\
|
||||||
UChi_12+= U_20*Chi_12;
|
UChi_12+= U_20*Chi_12;
|
||||||
|
|
||||||
#define PERMUTE\
|
#define PERMUTE_DIR(dir) \
|
||||||
permute(Chi_00,Chi_00,ptype);\
|
permute##dir(Chi_00,Chi_00);\
|
||||||
permute(Chi_01,Chi_01,ptype);\
|
permute##dir(Chi_01,Chi_01);\
|
||||||
permute(Chi_02,Chi_02,ptype);\
|
permute##dir(Chi_02,Chi_02);\
|
||||||
permute(Chi_10,Chi_10,ptype);\
|
permute##dir(Chi_10,Chi_10);\
|
||||||
permute(Chi_11,Chi_11,ptype);\
|
permute##dir(Chi_11,Chi_11);\
|
||||||
permute(Chi_12,Chi_12,ptype);
|
permute##dir(Chi_12,Chi_12);
|
||||||
|
|
||||||
// hspin(0)=fspin(0)+timesI(fspin(3));
|
// hspin(0)=fspin(0)+timesI(fspin(3));
|
||||||
// hspin(1)=fspin(1)+timesI(fspin(2));
|
// hspin(1)=fspin(1)+timesI(fspin(2));
|
||||||
@ -280,12 +280,16 @@
|
|||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
#if 0
|
#ifdef HANDOPT
|
||||||
template<class Simd>
|
template<class Impl>
|
||||||
void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
void WilsonKernels<Impl >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
|
// std::cout << "Hand op Dhop "<<std::endl;
|
||||||
|
typedef typename Simd::scalar_type S;
|
||||||
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
REGISTER Simd result_00; // 12 regs on knc
|
REGISTER Simd result_00; // 12 regs on knc
|
||||||
REGISTER Simd result_01;
|
REGISTER Simd result_01;
|
||||||
REGISTER Simd result_02;
|
REGISTER Simd result_02;
|
||||||
@ -339,20 +343,20 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
#define Chimu_32 UChi_12
|
#define Chimu_32 UChi_12
|
||||||
|
|
||||||
|
|
||||||
|
StencilEntry *SE;
|
||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
int ss=sF;
|
|
||||||
|
|
||||||
// Xp
|
// Xp
|
||||||
offset = st._offsets [Xp][ss];
|
SE=st.GetEntry(ptype,Xp,ss);
|
||||||
local = st._is_local[Xp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Xp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Xp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
XP_PROJ;
|
XP_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -364,16 +368,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
XP_RECON;
|
XP_RECON;
|
||||||
|
|
||||||
// Yp
|
// Yp
|
||||||
offset = st._offsets [Yp][ss];
|
SE=st.GetEntry(ptype,Yp,ss);
|
||||||
local = st._is_local[Yp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Yp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Yp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
YP_PROJ;
|
YP_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -385,16 +389,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
|
|
||||||
|
|
||||||
// Zp
|
// Zp
|
||||||
offset = st._offsets [Zp][ss];
|
SE=st.GetEntry(ptype,Zp,ss);
|
||||||
local = st._is_local[Zp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Zp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Zp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
ZP_PROJ;
|
ZP_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -405,16 +409,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
ZP_RECON_ACCUM;
|
ZP_RECON_ACCUM;
|
||||||
|
|
||||||
// Tp
|
// Tp
|
||||||
offset = st._offsets [Tp][ss];
|
SE=st.GetEntry(ptype,Tp,ss);
|
||||||
local = st._is_local[Tp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Tp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Tp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
TP_PROJ;
|
TP_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -425,16 +429,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
TP_RECON_ACCUM;
|
TP_RECON_ACCUM;
|
||||||
|
|
||||||
// Xm
|
// Xm
|
||||||
offset = st._offsets [Xm][ss];
|
SE=st.GetEntry(ptype,Xm,ss);
|
||||||
local = st._is_local[Xm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Xm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Xm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
XM_PROJ;
|
XM_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -445,16 +449,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
XM_RECON_ACCUM;
|
XM_RECON_ACCUM;
|
||||||
|
|
||||||
// Ym
|
// Ym
|
||||||
offset = st._offsets [Ym][ss];
|
SE=st.GetEntry(ptype,Ym,ss);
|
||||||
local = st._is_local[Ym][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Ym][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Ym];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
YM_PROJ;
|
YM_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -465,16 +469,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
YM_RECON_ACCUM;
|
YM_RECON_ACCUM;
|
||||||
|
|
||||||
// Zm
|
// Zm
|
||||||
offset = st._offsets [Zm][ss];
|
SE=st.GetEntry(ptype,Zm,ss);
|
||||||
local = st._is_local[Zm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Zm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Zm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
ZM_PROJ;
|
ZM_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -485,16 +489,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
ZM_RECON_ACCUM;
|
ZM_RECON_ACCUM;
|
||||||
|
|
||||||
// Tm
|
// Tm
|
||||||
offset = st._offsets [Tm][ss];
|
SE=st.GetEntry(ptype,Tm,ss);
|
||||||
local = st._is_local[Tm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Tm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Tm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
TM_PROJ;
|
TM_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -505,7 +509,7 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
TM_RECON_ACCUM;
|
TM_RECON_ACCUM;
|
||||||
|
|
||||||
{
|
{
|
||||||
vSpinColourVector & ref (out._odata[ss]);
|
SiteSpinor & ref (out._odata[ss]);
|
||||||
vstream(ref()(0)(0),result_00*(-0.5));
|
vstream(ref()(0)(0),result_00*(-0.5));
|
||||||
vstream(ref()(0)(1),result_01*(-0.5));
|
vstream(ref()(0)(1),result_01*(-0.5));
|
||||||
vstream(ref()(0)(2),result_02*(-0.5));
|
vstream(ref()(0)(2),result_02*(-0.5));
|
||||||
@ -521,11 +525,14 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Simd>
|
template<class Impl>
|
||||||
void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
|
typedef typename Simd::scalar_type S;
|
||||||
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
REGISTER Simd result_00; // 12 regs on knc
|
REGISTER Simd result_00; // 12 regs on knc
|
||||||
REGISTER Simd result_01;
|
REGISTER Simd result_01;
|
||||||
REGISTER Simd result_02;
|
REGISTER Simd result_02;
|
||||||
@ -580,18 +587,19 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
|
|
||||||
|
|
||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
|
StencilEntry *SE;
|
||||||
|
|
||||||
// Xp
|
// Xp
|
||||||
offset = st._offsets [Xp][ss];
|
SE=st.GetEntry(ptype,Xp,ss);
|
||||||
local = st._is_local[Xp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Xp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Xp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
XM_PROJ;
|
XM_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -602,16 +610,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
XM_RECON;
|
XM_RECON;
|
||||||
|
|
||||||
// Yp
|
// Yp
|
||||||
offset = st._offsets [Yp][ss];
|
SE=st.GetEntry(ptype,Yp,ss);
|
||||||
local = st._is_local[Yp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Yp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Yp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
YM_PROJ;
|
YM_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -623,16 +631,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
|
|
||||||
|
|
||||||
// Zp
|
// Zp
|
||||||
offset = st._offsets [Zp][ss];
|
SE=st.GetEntry(ptype,Zp,ss);
|
||||||
local = st._is_local[Zp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Zp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Zp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
ZM_PROJ;
|
ZM_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -643,16 +651,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
ZM_RECON_ACCUM;
|
ZM_RECON_ACCUM;
|
||||||
|
|
||||||
// Tp
|
// Tp
|
||||||
offset = st._offsets [Tp][ss];
|
SE=st.GetEntry(ptype,Tp,ss);
|
||||||
local = st._is_local[Tp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Tp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Tp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
TM_PROJ;
|
TM_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -663,16 +671,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
TM_RECON_ACCUM;
|
TM_RECON_ACCUM;
|
||||||
|
|
||||||
// Xm
|
// Xm
|
||||||
offset = st._offsets [Xm][ss];
|
SE=st.GetEntry(ptype,Xm,ss);
|
||||||
local = st._is_local[Xm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Xm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Xm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
XP_PROJ;
|
XP_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -684,16 +692,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
|
|
||||||
|
|
||||||
// Ym
|
// Ym
|
||||||
offset = st._offsets [Ym][ss];
|
SE=st.GetEntry(ptype,Ym,ss);
|
||||||
local = st._is_local[Ym][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Ym][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Ym];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
YP_PROJ;
|
YP_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -704,16 +712,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
YP_RECON_ACCUM;
|
YP_RECON_ACCUM;
|
||||||
|
|
||||||
// Zm
|
// Zm
|
||||||
offset = st._offsets [Zm][ss];
|
SE=st.GetEntry(ptype,Zm,ss);
|
||||||
local = st._is_local[Zm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Zm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Zm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
ZP_PROJ;
|
ZP_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -724,16 +732,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
ZP_RECON_ACCUM;
|
ZP_RECON_ACCUM;
|
||||||
|
|
||||||
// Tm
|
// Tm
|
||||||
offset = st._offsets [Tm][ss];
|
SE=st.GetEntry(ptype,Tm,ss);
|
||||||
local = st._is_local[Tm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Tm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Tm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
TP_PROJ;
|
TP_PROJ;
|
||||||
if ( perm) {
|
if ( perm) {
|
||||||
PERMUTE;
|
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI;
|
LOAD_CHI;
|
||||||
@ -759,5 +767,6 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
vstream(ref()(3)(2),result_32*(-0.5));
|
vstream(ref()(3)(2),result_32*(-0.5));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
FermOpTemplateInstantiate(WilsonKernels);
|
||||||
#endif
|
#endif
|
||||||
}}
|
}}
|
||||||
|
@ -7,35 +7,42 @@ namespace Grid{
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Wilson Gauge Action .. should I template the Nc etc..
|
// Wilson Gauge Action .. should I template the Nc etc..
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
template<class GaugeField, class MatrixField>
|
template<class GaugeField>
|
||||||
class WilsonGaugeAction : public Action<GaugeField> {
|
class WilsonGaugeAction : public Action<GaugeField> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef LorentzScalar<GaugeField> GaugeLinkField;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
RealD beta;
|
RealD beta;
|
||||||
public:
|
public:
|
||||||
WilsonGaugeAction(RealD b):beta(b){};
|
WilsonGaugeAction(RealD b):beta(b){};
|
||||||
|
|
||||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {};
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {}; // noop as no pseudoferms
|
||||||
|
|
||||||
virtual RealD S(const GaugeField &U) {
|
virtual RealD S(const GaugeField &U) {
|
||||||
RealD plaq = WilsonLoops<MatrixField,GaugeField>::avgPlaquette(U);
|
RealD plaq = WilsonLoops<GaugeField>::avgPlaquette(U);
|
||||||
std::cout<<GridLogMessage << "Plaq : "<<plaq << "\n";
|
std::cout<<GridLogMessage << "Plaq : "<<plaq << "\n";
|
||||||
RealD vol = U._grid->gSites();
|
RealD vol = U._grid->gSites();
|
||||||
RealD action=beta*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5;
|
RealD action=beta*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5;
|
||||||
std::cout << GridLogMessage << "WilsonGauge action "<<action<<std::endl;
|
std::cout << GridLogMessage << "WilsonGauge action "<<action<<std::endl;
|
||||||
return action;
|
return action;
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
|
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
|
||||||
//not optimal implementation FIXME
|
//not optimal implementation FIXME
|
||||||
//extend Ta to include Lorentz indexes
|
//extend Ta to include Lorentz indexes
|
||||||
RealD factor = 0.5*beta/RealD(Nc);
|
RealD factor = 0.5*beta/RealD(Nc);
|
||||||
MatrixField dSdU_mu(U._grid);
|
GaugeLinkField Umu(U._grid);
|
||||||
MatrixField Umu(U._grid);
|
GaugeLinkField dSdU_mu(U._grid);
|
||||||
for (int mu=0; mu < Nd; mu++){
|
for (int mu=0; mu < Nd; mu++){
|
||||||
|
|
||||||
Umu = PeekIndex<LorentzIndex>(U,mu);
|
Umu = PeekIndex<LorentzIndex>(U,mu);
|
||||||
|
|
||||||
// Staple in direction mu
|
// Staple in direction mu
|
||||||
WilsonLoops<MatrixField,GaugeField>::Staple(dSdU_mu,U,mu);
|
WilsonLoops<GaugeField>::Staple(dSdU_mu,U,mu);
|
||||||
dSdU_mu = Ta(Umu*adj(dSdU_mu))*factor;
|
dSdU_mu = Ta(Umu*adj(dSdU_mu))*factor;
|
||||||
pokeLorentz(dSdU, dSdU_mu, mu);
|
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@ -61,7 +61,7 @@ namespace Grid{
|
|||||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
PowerNegQuarter.Init(remez,param.tolerance,true);
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||||
|
|
||||||
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1/2 phi}
|
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1/2 phi}
|
||||||
// = e^{- phi^dag (MpcdagMpc)^-1/4 (MpcdagMpc)^-1/4 phi}
|
// = e^{- phi^dag (MpcdagMpc)^-1/4 (MpcdagMpc)^-1/4 phi}
|
||||||
|
@ -61,7 +61,7 @@ namespace Grid{
|
|||||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
PowerNegQuarter.Init(remez,param.tolerance,true);
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||||
|
|
||||||
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
|
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
|
||||||
//
|
//
|
||||||
|
@ -57,7 +57,7 @@ namespace Grid{
|
|||||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
PowerNegQuarter.Init(remez,param.tolerance,true);
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||||
|
|
||||||
// P(phi) = e^{- phi^dag (MdagM)^-1/2 phi}
|
// P(phi) = e^{- phi^dag (MdagM)^-1/2 phi}
|
||||||
// = e^{- phi^dag (MdagM)^-1/4 (MdagM)^-1/4 phi}
|
// = e^{- phi^dag (MdagM)^-1/4 (MdagM)^-1/4 phi}
|
||||||
|
@ -55,7 +55,7 @@ namespace Grid{
|
|||||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
PowerNegQuarter.Init(remez,param.tolerance,true);
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||||
|
|
||||||
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
|
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
|
||||||
//
|
//
|
||||||
|
@ -35,7 +35,7 @@ namespace Grid{
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Push the gauge field in to the dops. Assume any BC's and smearing already applied
|
// Push the gauge field in to the dops. Assume any BC's and smearing already applied
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||||
|
|
||||||
// P(phi) = e^{- phi^dag (MdagM)^-1 phi}
|
// P(phi) = e^{- phi^dag (MdagM)^-1 phi}
|
||||||
// Phi = Mdag eta
|
// Phi = Mdag eta
|
||||||
|
@ -44,7 +44,7 @@ namespace Grid{
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Push the gauge field in to the dops. Assume any BC's and smearing already applied
|
// Push the gauge field in to the dops. Assume any BC's and smearing already applied
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||||
|
|
||||||
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1 phi}
|
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1 phi}
|
||||||
// Phi = McpDag eta
|
// Phi = McpDag eta
|
||||||
|
@ -40,7 +40,7 @@ namespace Grid{
|
|||||||
conformable(_NumOp.GaugeRedBlackGrid(), _DenOp.GaugeRedBlackGrid());
|
conformable(_NumOp.GaugeRedBlackGrid(), _DenOp.GaugeRedBlackGrid());
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||||
|
|
||||||
// P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
|
// P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
|
||||||
//
|
//
|
||||||
|
@ -28,7 +28,7 @@ namespace Grid{
|
|||||||
OperatorFunction<FermionField> & AS
|
OperatorFunction<FermionField> & AS
|
||||||
) : NumOp(_NumOp), DenOp(_DenOp), DerivativeSolver(DS), ActionSolver(AS), Phi(_NumOp.FermionGrid()) {};
|
) : NumOp(_NumOp), DenOp(_DenOp), DerivativeSolver(DS), ActionSolver(AS), Phi(_NumOp.FermionGrid()) {};
|
||||||
|
|
||||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||||
|
|
||||||
// P(phi) = e^{- phi^dag V (MdagM)^-1 Vdag phi}
|
// P(phi) = e^{- phi^dag V (MdagM)^-1 Vdag phi}
|
||||||
//
|
//
|
||||||
|
@ -8,8 +8,8 @@ namespace Grid{
|
|||||||
|
|
||||||
////////////////////////////// Default values
|
////////////////////////////// Default values
|
||||||
Nsweeps = 200;
|
Nsweeps = 200;
|
||||||
TotalSweeps = 220;
|
TotalSweeps = 240;
|
||||||
ThermalizationSteps = 20;
|
ThermalizationSteps = 40;
|
||||||
StartingConfig = 0;
|
StartingConfig = 0;
|
||||||
SaveInterval = 1;
|
SaveInterval = 1;
|
||||||
Filename_prefix = "Conf_";
|
Filename_prefix = "Conf_";
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
namespace Grid{
|
namespace Grid{
|
||||||
namespace QCD{
|
namespace QCD{
|
||||||
|
|
||||||
|
|
||||||
struct HMCparameters{
|
struct HMCparameters{
|
||||||
Integer Nsweeps; /* @brief Number of sweeps in this run */
|
Integer Nsweeps; /* @brief Number of sweeps in this run */
|
||||||
Integer TotalSweeps; /* @brief If provided, the total number of sweeps */
|
Integer TotalSweeps; /* @brief If provided, the total number of sweeps */
|
||||||
@ -26,14 +27,17 @@ namespace Grid{
|
|||||||
HMCparameters();
|
HMCparameters();
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Algorithm>
|
// template <class GaugeField, class Integrator, class Smearer, class Boundary>
|
||||||
class HybridMonteCarlo{
|
template <class GaugeField, class IntegratorType>
|
||||||
|
class HybridMonteCarlo {
|
||||||
|
private:
|
||||||
|
|
||||||
const HMCparameters Params;
|
const HMCparameters Params;
|
||||||
|
|
||||||
|
GridSerialRNG &sRNG; // Fixme: need a RNG management strategy.
|
||||||
|
GridParallelRNG &pRNG; // Fixme: need a RNG management strategy.
|
||||||
|
|
||||||
GridSerialRNG sRNG; // Fixme: need a RNG management strategy.
|
IntegratorType &TheIntegrator;
|
||||||
|
|
||||||
Integrator<Algorithm>& MD;
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
// Metropolis step
|
// Metropolis step
|
||||||
@ -63,16 +67,20 @@ namespace Grid{
|
|||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
// Evolution
|
// Evolution
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
RealD evolve_step(LatticeGaugeField& U){
|
RealD evolve_step(GaugeField& U){
|
||||||
MD.init(U); // set U and initialize P and phi's
|
|
||||||
|
TheIntegrator.refresh(U,pRNG); // set U and initialize P and phi's
|
||||||
|
|
||||||
|
RealD H0 = TheIntegrator.S(U); // initial state action
|
||||||
|
|
||||||
RealD H0 = MD.S(U); // initial state action
|
|
||||||
std::cout<<GridLogMessage<<"Total H before = "<< H0 << "\n";
|
std::cout<<GridLogMessage<<"Total H before = "<< H0 << "\n";
|
||||||
|
|
||||||
MD.integrate(U);
|
TheIntegrator.integrate(U);
|
||||||
|
|
||||||
RealD H1 = MD.S(U); // updated state action
|
RealD H1 = TheIntegrator.S(U); // updated state action
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"Total H after = "<< H1 << "\n";
|
std::cout<<GridLogMessage<<"Total H after = "<< H1 << "\n";
|
||||||
|
|
||||||
return (H1-H0);
|
return (H1-H0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,16 +89,18 @@ namespace Grid{
|
|||||||
/////////////////////////////////////////
|
/////////////////////////////////////////
|
||||||
// Constructor
|
// Constructor
|
||||||
/////////////////////////////////////////
|
/////////////////////////////////////////
|
||||||
HybridMonteCarlo(HMCparameters Pms, Integrator<Algorithm>& MolDyn): Params(Pms),MD(MolDyn) {
|
HybridMonteCarlo(HMCparameters Pms, IntegratorType &_Int, GridSerialRNG &_sRNG, GridParallelRNG &_pRNG ) :
|
||||||
|
Params(Pms),
|
||||||
//FIXME... initialize RNGs also with seed ; RNG management strategy
|
TheIntegrator(_Int),
|
||||||
sRNG.SeedRandomDevice();
|
sRNG(_sRNG),
|
||||||
|
pRNG(_pRNG)
|
||||||
|
{
|
||||||
}
|
}
|
||||||
~HybridMonteCarlo(){};
|
~HybridMonteCarlo(){};
|
||||||
|
|
||||||
|
|
||||||
void evolve(LatticeGaugeField& Uin){
|
void evolve(GaugeField& Uin){
|
||||||
|
|
||||||
Real DeltaH;
|
Real DeltaH;
|
||||||
|
|
||||||
// Thermalizations
|
// Thermalizations
|
||||||
@ -102,9 +112,8 @@ namespace Grid{
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Actual updates (evolve a copy Ucopy then copy back eventually)
|
// Actual updates (evolve a copy Ucopy then copy back eventually)
|
||||||
LatticeGaugeField Ucopy(Uin._grid);
|
GaugeField Ucopy(Uin._grid);
|
||||||
for(int iter=Params.StartingConfig;
|
for(int iter=Params.StartingConfig; iter < Params.Nsweeps+Params.StartingConfig; ++iter){
|
||||||
iter < Params.Nsweeps+Params.StartingConfig; ++iter){
|
|
||||||
std::cout<<GridLogMessage << "-- # Sweep = "<< iter << "\n";
|
std::cout<<GridLogMessage << "-- # Sweep = "<< iter << "\n";
|
||||||
|
|
||||||
Ucopy = Uin;
|
Ucopy = Uin;
|
||||||
|
@ -1,28 +0,0 @@
|
|||||||
/*!
|
|
||||||
@file Integrator_base.cc
|
|
||||||
@brief utilities for MD including funcs to generate initial HMC momentum
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <Grid.h>
|
|
||||||
|
|
||||||
namespace Grid{
|
|
||||||
namespace QCD{
|
|
||||||
|
|
||||||
void MDutils::generate_momenta(LatticeLorentzColourMatrix& P,GridParallelRNG& pRNG){
|
|
||||||
// for future support of different groups
|
|
||||||
MDutils::generate_momenta_su3(P, pRNG);
|
|
||||||
}
|
|
||||||
|
|
||||||
void MDutils::generate_momenta_su3(LatticeLorentzColourMatrix& P,GridParallelRNG& pRNG){
|
|
||||||
LatticeColourMatrix Pmu(P._grid);
|
|
||||||
Pmu = zero;
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
SU3::GaussianLieAlgebraMatrix(pRNG, Pmu);
|
|
||||||
PokeIndex<LorentzIndex>(P, Pmu, mu);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
@ -10,143 +10,193 @@
|
|||||||
#ifndef INTEGRATOR_INCLUDED
|
#ifndef INTEGRATOR_INCLUDED
|
||||||
#define INTEGRATOR_INCLUDED
|
#define INTEGRATOR_INCLUDED
|
||||||
|
|
||||||
class Observer;
|
//class Observer;
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
namespace Grid{
|
namespace Grid{
|
||||||
namespace QCD{
|
namespace QCD{
|
||||||
|
|
||||||
typedef Action<LatticeGaugeField>* ActPtr; // now force the same colours as the rest of the code
|
|
||||||
struct ActionLevel{
|
|
||||||
int multiplier;
|
|
||||||
public:
|
|
||||||
std::vector<ActPtr> actions;
|
|
||||||
explicit ActionLevel(int mul = 1):multiplier(mul){assert (mul > 0);};
|
|
||||||
void push_back(ActPtr ptr){
|
|
||||||
actions.push_back(ptr);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
typedef std::vector<ActionLevel> ActionSet;
|
|
||||||
typedef std::vector<Observer*> ObserverList;
|
|
||||||
|
|
||||||
struct IntegratorParameters{
|
struct IntegratorParameters{
|
||||||
|
|
||||||
int Nexp;
|
int Nexp;
|
||||||
int MDsteps; // number of outer steps
|
int MDsteps; // number of outer steps
|
||||||
RealD trajL; // trajectory length
|
RealD trajL; // trajectory length
|
||||||
RealD stepsize;
|
RealD stepsize;
|
||||||
|
|
||||||
IntegratorParameters(int Nexp_,
|
IntegratorParameters(int MDsteps_,
|
||||||
int MDsteps_,
|
RealD trajL_=1.0,
|
||||||
RealD trajL_):
|
int Nexp_=12):
|
||||||
Nexp(Nexp_),MDsteps(MDsteps_),trajL(trajL_),stepsize(trajL/MDsteps){};
|
Nexp(Nexp_),
|
||||||
|
MDsteps(MDsteps_),
|
||||||
|
trajL(trajL_),
|
||||||
|
stepsize(trajL/MDsteps)
|
||||||
|
{
|
||||||
|
// empty body constructor
|
||||||
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
namespace MDutils{
|
|
||||||
void generate_momenta(LatticeGaugeField&,GridParallelRNG&);
|
|
||||||
void generate_momenta_su3(LatticeGaugeField&,GridParallelRNG&);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*! @brief Class for Molecular Dynamics management */
|
/*! @brief Class for Molecular Dynamics management */
|
||||||
template< class IntegratorAlgorithm >
|
template<class GaugeField>
|
||||||
class Integrator{
|
class Integrator {
|
||||||
private:
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
typedef IntegratorParameters ParameterType;
|
||||||
|
|
||||||
IntegratorParameters Params;
|
IntegratorParameters Params;
|
||||||
const ActionSet as;
|
|
||||||
std::unique_ptr<LatticeGaugeField> P;
|
|
||||||
GridParallelRNG pRNG;
|
|
||||||
//ObserverList observers; // not yet
|
|
||||||
|
|
||||||
IntegratorAlgorithm TheIntegrator;
|
|
||||||
|
|
||||||
void register_observers();
|
const ActionSet<GaugeField> as;
|
||||||
void notify_observers();
|
|
||||||
|
|
||||||
void update_P(LatticeGaugeField&U, int level,double ep){
|
int levels; //
|
||||||
for(int a=0; a<as[level].actions.size(); ++a){
|
double t_U; // Track time passing on each level and for U and for P
|
||||||
LatticeGaugeField force(U._grid);
|
std::vector<double> t_P; //
|
||||||
as[level].actions.at(a)->deriv(U,force);
|
|
||||||
*P -= force*ep;
|
GaugeField P;
|
||||||
|
|
||||||
|
// Should match any legal (SU(n)) gauge field
|
||||||
|
// Need to use this template to match Ncol to pass to SU<N> class
|
||||||
|
template<int Ncol,class vec> void generate_momenta(Lattice< iVector< iScalar< iMatrix<vec,Ncol> >, Nd> > & P,GridParallelRNG& pRNG){
|
||||||
|
typedef Lattice< iScalar< iScalar< iMatrix<vec,Ncol> > > > GaugeLinkField;
|
||||||
|
GaugeLinkField Pmu(P._grid);
|
||||||
|
Pmu = zero;
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
SU<Ncol>::GaussianLieAlgebraMatrix(pRNG, Pmu);
|
||||||
|
PokeIndex<LorentzIndex>(P, Pmu, mu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void update_U(LatticeGaugeField&U, double ep){
|
|
||||||
|
//ObserverList observers; // not yet
|
||||||
|
// typedef std::vector<Observer*> ObserverList;
|
||||||
|
// void register_observers();
|
||||||
|
// void notify_observers();
|
||||||
|
|
||||||
|
void update_P(GaugeField&U, int level,double ep){
|
||||||
|
t_P[level]+=ep;
|
||||||
|
update_P(P,U,level,ep);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage;
|
||||||
|
for(int l=0; l<level;++l) std::cout<<" ";
|
||||||
|
std::cout<<"["<<level<<"] P " << " dt "<< ep <<" : t_P "<< t_P[level] <<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_P(GaugeField &Mom,GaugeField&U, int level,double ep){
|
||||||
|
for(int a=0; a<as[level].actions.size(); ++a){
|
||||||
|
GaugeField force(U._grid);
|
||||||
|
as[level].actions.at(a)->deriv(U,force);
|
||||||
|
Mom = Mom - force*ep;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_U(GaugeField&U, double ep){
|
||||||
|
update_U(P,U,ep);
|
||||||
|
|
||||||
|
t_U+=ep;
|
||||||
|
int fl = levels-1;
|
||||||
|
std::cout<<GridLogMessage<<" ";
|
||||||
|
for(int l=0; l<fl;++l) std::cout<<" ";
|
||||||
|
std::cout<<"["<<fl<<"] U " << " dt "<< ep <<" : t_U "<< t_U <<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
void update_U(GaugeField &Mom, GaugeField&U, double ep){
|
||||||
//rewrite exponential to deal automatically with the lorentz index?
|
//rewrite exponential to deal automatically with the lorentz index?
|
||||||
LatticeColourMatrix Umu(U._grid);
|
// GaugeLinkField Umu(U._grid);
|
||||||
LatticeColourMatrix Pmu(U._grid);
|
// GaugeLinkField Pmu(U._grid);
|
||||||
for (int mu = 0; mu < Nd; mu++){
|
for (int mu = 0; mu < Nd; mu++){
|
||||||
Umu=PeekIndex<LorentzIndex>(U, mu);
|
auto Umu=PeekIndex<LorentzIndex>(U, mu);
|
||||||
Pmu=PeekIndex<LorentzIndex>(*P, mu);
|
auto Pmu=PeekIndex<LorentzIndex>(Mom, mu);
|
||||||
Umu = expMat(Pmu, ep, Params.Nexp)*Umu;
|
Umu = expMat(Pmu, ep, Params.Nexp)*Umu;
|
||||||
|
ProjectOnGroup(Umu);
|
||||||
PokeIndex<LorentzIndex>(U, Umu, mu);
|
PokeIndex<LorentzIndex>(U, Umu, mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void step (GaugeField& U,int level, int first,int last)=0;
|
||||||
friend void IntegratorAlgorithm::step (LatticeGaugeField& U,
|
|
||||||
int level, std::vector<int>& clock,
|
|
||||||
Integrator<IntegratorAlgorithm>* Integ);
|
|
||||||
public:
|
public:
|
||||||
Integrator(GridBase* grid, IntegratorParameters Par,
|
|
||||||
ActionSet& Aset):
|
Integrator(GridBase* grid,
|
||||||
Params(Par),as(Aset),P(new LatticeGaugeField(grid)),pRNG(grid){
|
IntegratorParameters Par,
|
||||||
pRNG.SeedRandomDevice();
|
ActionSet<GaugeField> & Aset):
|
||||||
|
Params(Par),
|
||||||
|
as(Aset),
|
||||||
|
P(grid),
|
||||||
|
levels(Aset.size())
|
||||||
|
{
|
||||||
|
t_P.resize(levels,0.0);
|
||||||
|
t_U=0.0;
|
||||||
};
|
};
|
||||||
|
|
||||||
~Integrator(){}
|
virtual ~Integrator(){}
|
||||||
|
|
||||||
//Initialization of momenta and actions
|
//Initialization of momenta and actions
|
||||||
void init(LatticeGaugeField& U){
|
void refresh(GaugeField& U,GridParallelRNG &pRNG){
|
||||||
std::cout<<GridLogMessage<< "Integrator init\n";
|
std::cout<<GridLogMessage<< "Integrator refresh\n";
|
||||||
MDutils::generate_momenta(*P,pRNG);
|
generate_momenta(P,pRNG);
|
||||||
for(int level=0; level< as.size(); ++level){
|
for(int level=0; level< as.size(); ++level){
|
||||||
for(int actionID=0; actionID<as[level].actions.size(); ++actionID){
|
for(int actionID=0; actionID<as[level].actions.size(); ++actionID){
|
||||||
as[level].actions.at(actionID)->init(U, pRNG);
|
as[level].actions.at(actionID)->refresh(U, pRNG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate action
|
// Calculate action
|
||||||
RealD S(LatticeGaugeField& U){
|
RealD S(GaugeField& U){
|
||||||
LatticeComplex Hloc(U._grid);
|
|
||||||
Hloc = zero;
|
LatticeComplex Hloc(U._grid); Hloc = zero;
|
||||||
// Momenta
|
// Momenta
|
||||||
for (int mu=0; mu <Nd; mu++){
|
for (int mu=0; mu <Nd; mu++){
|
||||||
LatticeColourMatrix Pmu = peekLorentz(*P, mu);
|
auto Pmu = PeekIndex<LorentzIndex>(P, mu);
|
||||||
Hloc -= trace(Pmu*Pmu);
|
Hloc -= trace(Pmu*Pmu);
|
||||||
}
|
}
|
||||||
Complex Hsum = sum(Hloc);
|
Complex Hsum = sum(Hloc);
|
||||||
|
|
||||||
RealD H = Hsum.real();
|
RealD H = Hsum.real();
|
||||||
|
RealD Hterm;
|
||||||
std::cout<<GridLogMessage << "Momentum action H_p = "<< H << "\n";
|
std::cout<<GridLogMessage << "Momentum action H_p = "<< H << "\n";
|
||||||
|
|
||||||
// Actions
|
// Actions
|
||||||
for(int level=0; level<as.size(); ++level)
|
for(int level=0; level<as.size(); ++level){
|
||||||
for(int actionID=0; actionID<as[level].actions.size(); ++actionID)
|
for(int actionID=0; actionID<as[level].actions.size(); ++actionID){
|
||||||
H += as[level].actions.at(actionID)->S(U);
|
Hterm = as[level].actions.at(actionID)->S(U);
|
||||||
|
std::cout<<GridLogMessage << "Level "<<level<<" term "<<actionID<<" H = "<<Hterm<<std::endl;
|
||||||
|
H += Hterm;
|
||||||
|
}
|
||||||
|
}
|
||||||
std::cout<<GridLogMessage << "Total action H = "<< H << "\n";
|
std::cout<<GridLogMessage << "Total action H = "<< H << "\n";
|
||||||
|
|
||||||
return H;
|
return H;
|
||||||
}
|
}
|
||||||
|
|
||||||
void integrate(LatticeGaugeField& U){
|
void integrate(GaugeField& U){
|
||||||
std::vector<int> clock;
|
|
||||||
clock.resize(as.size(),0);
|
// reset the clocks
|
||||||
for(int step=0; step< Params.MDsteps; ++step) // MD step
|
t_U=0;
|
||||||
TheIntegrator.step(U,0,clock, (this));
|
for(int level=0; level<as.size(); ++level){
|
||||||
|
t_P[level]=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int step=0; step< Params.MDsteps; ++step){ // MD step
|
||||||
|
int first_step = (step==0);
|
||||||
|
int last_step = (step==Params.MDsteps-1);
|
||||||
|
this->step(U,0,first_step,last_step);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the clocks all match on all levels
|
||||||
|
for(int level=0; level<as.size(); ++level){
|
||||||
|
assert(fabs(t_U - t_P[level])<1.0e-6); // must be the same
|
||||||
|
std::cout<<GridLogMessage<<" times["<<level<<"]= "<<t_P[level]<< " " << t_U <<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// and that we indeed got to the end of the trajectory
|
||||||
|
assert(fabs(t_U-Params.trajL) < 1.0e-6);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif//INTEGRATOR_INCLUDED
|
#endif//INTEGRATOR_INCLUDED
|
||||||
|
@ -9,138 +9,236 @@
|
|||||||
#ifndef INTEGRATOR_ALG_INCLUDED
|
#ifndef INTEGRATOR_ALG_INCLUDED
|
||||||
#define INTEGRATOR_ALG_INCLUDED
|
#define INTEGRATOR_ALG_INCLUDED
|
||||||
|
|
||||||
|
|
||||||
namespace Grid{
|
namespace Grid{
|
||||||
namespace QCD{
|
namespace QCD{
|
||||||
|
|
||||||
|
/* PAB:
|
||||||
|
*
|
||||||
|
* Recursive leapfrog; explanation of nested stepping
|
||||||
|
*
|
||||||
|
* Nested 1:4; units in dt for top level integrator
|
||||||
|
*
|
||||||
|
* CHROMA IroIro
|
||||||
|
* 0 1 0
|
||||||
|
* P 1/2 P 1/2
|
||||||
|
* P 1/16 P1/16
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/16 P1/8
|
||||||
|
* P 1 P 1
|
||||||
|
* P 1/16 * skipped --- avoids revaluating force
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/16 P1/8
|
||||||
|
* P 1 P 1
|
||||||
|
* P 1/16 * skipped
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/16 * skipped
|
||||||
|
* P 1 P 1
|
||||||
|
* P 1/16 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/8 P1/8
|
||||||
|
* U 1/8 U1/8
|
||||||
|
* P 1/16 P1/16
|
||||||
|
* P 1/2 P 1/2
|
||||||
|
*/
|
||||||
|
|
||||||
class MinimumNorm2{
|
template<class GaugeField> class LeapFrog : public Integrator<GaugeField> {
|
||||||
const double lambda = 0.1931833275037836;
|
|
||||||
public:
|
public:
|
||||||
void step (LatticeLorentzColourMatrix& U,
|
|
||||||
int level, std::vector<int>& clock,
|
typedef LeapFrog<GaugeField> Algorithm;
|
||||||
Integrator<MinimumNorm2>* Integ){
|
|
||||||
|
LeapFrog(GridBase* grid,
|
||||||
|
IntegratorParameters Par,
|
||||||
|
ActionSet<GaugeField> & Aset): Integrator<GaugeField>(grid,Par,Aset) {};
|
||||||
|
|
||||||
|
|
||||||
|
void step (GaugeField& U, int level,int _first, int _last){
|
||||||
|
|
||||||
|
int fl = this->as.size() -1;
|
||||||
// level : current level
|
// level : current level
|
||||||
// fl : final level
|
// fl : final level
|
||||||
// eps : current step size
|
// eps : current step size
|
||||||
|
|
||||||
int fl = Integ->as.size() -1;
|
|
||||||
double eps = Integ->Params.stepsize;
|
|
||||||
|
|
||||||
for(int l=0; l<=level; ++l) eps/= 2.0*Integ->as[l].multiplier;
|
|
||||||
|
|
||||||
int fin = Integ->as[0].multiplier;
|
|
||||||
for(int l=1; l<=level; ++l) fin*= 2.0*Integ->as[l].multiplier;
|
|
||||||
fin = 3*Integ->Params.MDsteps*fin -1;
|
|
||||||
|
|
||||||
for(int e=0; e<Integ->as[level].multiplier; ++e){
|
|
||||||
if(clock[level] == 0){ // initial half step
|
|
||||||
Integ->update_P(U,level,lambda*eps);
|
|
||||||
++clock[level];
|
|
||||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
|
||||||
std::cout<<GridLogMessage<<"P "<< clock[level] <<std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(level == fl){ // lowest level
|
|
||||||
Integ->update_U(U,0.5*eps);
|
|
||||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
|
||||||
std::cout<<GridLogMessage<<"U "<< (clock[level]+1) <<std::endl;
|
|
||||||
}else{ // recursive function call
|
|
||||||
step(U,level+1,clock, Integ);
|
|
||||||
}
|
|
||||||
|
|
||||||
Integ->update_P(U,level,(1.0-2.0*lambda)*eps);
|
|
||||||
++clock[level];
|
|
||||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
|
||||||
std::cout<<GridLogMessage<<"P "<< (clock[level]) <<std::endl;
|
|
||||||
|
|
||||||
if(level == fl){ // lowest level
|
|
||||||
Integ->update_U(U,0.5*eps);
|
|
||||||
|
|
||||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
|
||||||
std::cout<<GridLogMessage<<"U "<< (clock[level]+1) <<std::endl;
|
|
||||||
}else{ // recursive function call
|
|
||||||
step(U,level+1,clock, Integ);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if(clock[level] == fin){ // final half step
|
|
||||||
Integ->update_P(U,level,lambda*eps);
|
|
||||||
|
|
||||||
++clock[level];
|
|
||||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
|
||||||
std::cout<<GridLogMessage<<"P "<< clock[level] <<std::endl;
|
|
||||||
}else{ // bulk step
|
|
||||||
Integ->update_P(U,level,lambda*2.0*eps);
|
|
||||||
|
|
||||||
clock[level]+=2;
|
|
||||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
|
||||||
std::cout<<GridLogMessage<<"P "<< clock[level] <<std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
class LeapFrog{
|
|
||||||
public:
|
|
||||||
void step (LatticeLorentzColourMatrix& U,
|
|
||||||
int level, std::vector<int>& clock,
|
|
||||||
Integrator<LeapFrog>* Integ){
|
|
||||||
|
|
||||||
// level : current level
|
|
||||||
// fl : final level
|
|
||||||
// eps : current step size
|
|
||||||
|
|
||||||
int fl = Integ->as.size() -1;
|
|
||||||
double eps = Integ->Params.stepsize;
|
|
||||||
|
|
||||||
// Get current level step size
|
// Get current level step size
|
||||||
for(int l=0; l<=level; ++l) eps/= Integ->as[l].multiplier;
|
RealD eps = this->Params.stepsize;
|
||||||
|
for(int l=0; l<=level; ++l) eps/= this->as[l].multiplier;
|
||||||
|
|
||||||
int fin = 1;
|
int multiplier = this->as[level].multiplier;
|
||||||
for(int l=0; l<=level; ++l) fin*= Integ->as[l].multiplier;
|
for(int e=0; e<multiplier; ++e){
|
||||||
fin = 2*Integ->Params.MDsteps*fin - 1;
|
|
||||||
|
int first_step = _first && (e==0);
|
||||||
for(int e=0; e<Integ->as[level].multiplier; ++e){
|
int last_step = _last && (e==multiplier-1);
|
||||||
|
|
||||||
if(clock[level] == 0){ // initial half step
|
if(first_step){ // initial half step
|
||||||
Integ->update_P(U, level,eps/2.0);
|
this->update_P(U, level,eps/2.0);
|
||||||
++clock[level];
|
|
||||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
|
||||||
std::cout<<GridLogMessage<<"P "<< 0.5*clock[level] <<std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(level == fl){ // lowest level
|
if(level == fl){ // lowest level
|
||||||
Integ->update_U(U, eps);
|
this->update_U(U, eps);
|
||||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
|
||||||
std::cout<<GridLogMessage<<"U "<< 0.5*(clock[level]+1) <<std::endl;
|
|
||||||
}else{ // recursive function call
|
}else{ // recursive function call
|
||||||
step(U, level+1,clock, Integ);
|
this->step(U, level+1,first_step,last_step);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(clock[level] == fin){ // final half step
|
int mm = last_step ? 1 : 2;
|
||||||
Integ->update_P(U, level,eps/2.0);
|
this->update_P(U, level,mm*eps/2.0);
|
||||||
++clock[level];
|
|
||||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
|
||||||
std::cout<<GridLogMessage<<"P "<< 0.5*clock[level] <<std::endl;
|
|
||||||
}else{ // bulk step
|
|
||||||
Integ->update_P(U, level,eps);
|
|
||||||
clock[level]+=2;
|
|
||||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
|
||||||
std::cout<<GridLogMessage<<"P "<< 0.5*clock[level] <<std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<class GaugeField> class MinimumNorm2 : public Integrator<GaugeField> {
|
||||||
|
private:
|
||||||
|
const RealD lambda = 0.1931833275037836;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
MinimumNorm2(GridBase* grid,
|
||||||
|
IntegratorParameters Par,
|
||||||
|
ActionSet<GaugeField> & Aset): Integrator<GaugeField>(grid,Par,Aset) {};
|
||||||
|
|
||||||
|
void step (GaugeField& U, int level, int _first,int _last){
|
||||||
|
|
||||||
|
// level : current level
|
||||||
|
// fl : final level
|
||||||
|
// eps : current step size
|
||||||
|
|
||||||
|
int fl = this->as.size() -1;
|
||||||
|
|
||||||
|
RealD eps = this->Params.stepsize*2.0;
|
||||||
|
for(int l=0; l<=level; ++l) eps/= 2.0*this->as[l].multiplier;
|
||||||
|
|
||||||
|
// Nesting: 2xupdate_U of size eps/2
|
||||||
|
// Next level is eps/2/multiplier
|
||||||
|
|
||||||
|
int multiplier = this->as[level].multiplier;
|
||||||
|
for(int e=0; e<multiplier; ++e){ // steps per step
|
||||||
|
|
||||||
|
int first_step = _first && (e==0);
|
||||||
|
int last_step = _last && (e==multiplier-1);
|
||||||
|
|
||||||
|
if(first_step){ // initial half step
|
||||||
|
this->update_P(U,level,lambda*eps);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(level == fl){ // lowest level
|
||||||
|
this->update_U(U,0.5*eps);
|
||||||
|
}else{ // recursive function call
|
||||||
|
this->step(U,level+1,first_step,0);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->update_P(U,level,(1.0-2.0*lambda)*eps);
|
||||||
|
|
||||||
|
if(level == fl){ // lowest level
|
||||||
|
this->update_U(U,0.5*eps);
|
||||||
|
}else{ // recursive function call
|
||||||
|
this->step(U,level+1,0,last_step);
|
||||||
|
}
|
||||||
|
|
||||||
|
int mm = (last_step) ? 1 : 2;
|
||||||
|
this->update_P(U,level,lambda*eps*mm);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<class GaugeField> class ForceGradient : public Integrator<GaugeField> {
|
||||||
|
private:
|
||||||
|
const RealD lambda = 1.0/6.0;;
|
||||||
|
const RealD chi = 1.0/72.0;
|
||||||
|
const RealD xi = 0.0;
|
||||||
|
const RealD theta = 0.0;
|
||||||
|
public:
|
||||||
|
|
||||||
|
// Looks like dH scales as dt^4. tested wilson/wilson 2 level.
|
||||||
|
ForceGradient(GridBase* grid,
|
||||||
|
IntegratorParameters Par,
|
||||||
|
ActionSet<GaugeField> & Aset): Integrator<GaugeField>(grid,Par,Aset) {};
|
||||||
|
|
||||||
|
|
||||||
|
void FG_update_P(GaugeField&U, int level,double fg_dt,double ep){
|
||||||
|
GaugeField Ufg(U._grid);
|
||||||
|
GaugeField Pfg(U._grid);
|
||||||
|
Ufg = U;
|
||||||
|
Pfg = zero;
|
||||||
|
std::cout << GridLogMessage << "FG update "<<fg_dt<<" "<<ep<<std::endl;
|
||||||
|
// prepare_fg; no prediction/result cache for now
|
||||||
|
// could relax CG stopping conditions for the
|
||||||
|
// derivatives in the small step since the force gets multiplied by
|
||||||
|
// a tiny dt^2 term relative to main force.
|
||||||
|
//
|
||||||
|
// Presently 4 force evals, and should have 3, so 1.33x too expensive.
|
||||||
|
// could reduce this with sloppy CG to perhaps 1.15x too expensive
|
||||||
|
// even without prediction.
|
||||||
|
this->update_P(Pfg,Ufg,level,1.0);
|
||||||
|
this->update_U(Pfg,Ufg,fg_dt);
|
||||||
|
this->update_P(Ufg,level,ep);
|
||||||
|
}
|
||||||
|
|
||||||
|
void step (GaugeField& U, int level, int _first,int _last){
|
||||||
|
|
||||||
|
RealD eps = this->Params.stepsize*2.0;
|
||||||
|
for(int l=0; l<=level; ++l) eps/= 2.0*this->as[l].multiplier;
|
||||||
|
|
||||||
|
RealD Chi = chi*eps*eps*eps;
|
||||||
|
|
||||||
|
int fl = this->as.size() -1;
|
||||||
|
|
||||||
|
int multiplier = this->as[level].multiplier;
|
||||||
|
|
||||||
|
for(int e=0; e<multiplier; ++e){ // steps per step
|
||||||
|
|
||||||
|
|
||||||
|
int first_step = _first && (e==0);
|
||||||
|
int last_step = _last && (e==multiplier-1);
|
||||||
|
|
||||||
|
if(first_step){ // initial half step
|
||||||
|
this->update_P(U,level,lambda*eps);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(level == fl){ // lowest level
|
||||||
|
this->update_U(U,0.5*eps);
|
||||||
|
}else{ // recursive function call
|
||||||
|
this->step(U,level+1,first_step,0);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->FG_update_P(U,level,2*Chi/((1.0-2.0*lambda)*eps),(1.0-2.0*lambda)*eps);
|
||||||
|
|
||||||
|
if(level == fl){ // lowest level
|
||||||
|
this->update_U(U,0.5*eps);
|
||||||
|
}else{ // recursive function call
|
||||||
|
this->step(U,level+1,0,last_step);
|
||||||
|
}
|
||||||
|
|
||||||
|
int mm = (last_step) ? 1 : 2;
|
||||||
|
this->update_P(U,level,lambda*eps*mm);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif//INTEGRATOR_INCLUDED
|
#endif//INTEGRATOR_INCLUDED
|
||||||
|
@ -524,16 +524,22 @@ Note that in step D setting B ~ X - A and using B in place of A in step E will g
|
|||||||
// reunitarise??
|
// reunitarise??
|
||||||
static void LieRandomize(GridParallelRNG &pRNG,LatticeMatrix &out,double scale=1.0){
|
static void LieRandomize(GridParallelRNG &pRNG,LatticeMatrix &out,double scale=1.0){
|
||||||
GridBase *grid = out._grid;
|
GridBase *grid = out._grid;
|
||||||
|
|
||||||
LatticeComplex ca (grid);
|
LatticeComplex ca (grid);
|
||||||
LatticeMatrix lie(grid);
|
LatticeMatrix lie(grid);
|
||||||
LatticeMatrix la (grid);
|
LatticeMatrix la (grid);
|
||||||
Complex ci(0.0,scale);
|
Complex ci(0.0,scale);
|
||||||
|
Complex cone(1.0,0.0);
|
||||||
Matrix ta;
|
Matrix ta;
|
||||||
|
|
||||||
lie=zero;
|
lie=zero;
|
||||||
for(int a=0;a<generators();a++){
|
for(int a=0;a<generators();a++){
|
||||||
|
|
||||||
random(pRNG,ca); ca=real(ca)-0.5;
|
random(pRNG,ca);
|
||||||
|
|
||||||
|
ca = (ca+conjugate(ca))*0.5;
|
||||||
|
ca = ca - 0.5;
|
||||||
|
|
||||||
generator(a,ta);
|
generator(a,ta);
|
||||||
|
|
||||||
la=ci*ca*ta;
|
la=ci*ca*ta;
|
||||||
|
@ -4,9 +4,12 @@ namespace Grid {
|
|||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
// Common wilson loop observables
|
// Common wilson loop observables
|
||||||
template<class GaugeMat,class GaugeLorentz>
|
template<class GaugeLorentz>
|
||||||
class WilsonLoops {
|
class WilsonLoops {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
typedef LorentzScalar<GaugeLorentz> GaugeMat;
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// directed plaquette oriented in mu,nu plane
|
// directed plaquette oriented in mu,nu plane
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
@ -68,6 +71,22 @@ public:
|
|||||||
|
|
||||||
return sumplaq/vol/faces/Nc; // Nd , Nc dependent... FIXME
|
return sumplaq/vol/faces/Nc; // Nd , Nc dependent... FIXME
|
||||||
}
|
}
|
||||||
|
static RealD linkTrace(const GaugeLorentz &Umu){
|
||||||
|
std::vector<GaugeMat> U(4,Umu._grid);
|
||||||
|
|
||||||
|
LatticeComplex Tr(Umu._grid); Tr=zero;
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
Tr = Tr+trace(U[mu]);
|
||||||
|
}
|
||||||
|
|
||||||
|
TComplex Tp = sum(Tr);
|
||||||
|
Complex p = TensorRemove(Tp);
|
||||||
|
|
||||||
|
double vol = Umu._grid->gSites();
|
||||||
|
|
||||||
|
return p.real()/vol/4.0/3.0;
|
||||||
|
};
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// the sum over all staples on each site
|
// the sum over all staples on each site
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
@ -125,10 +144,10 @@ void siteRectangle(GaugeMat &plaq,const std::vector<GaugeMat> &U, const int mu,
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
typedef WilsonLoops<LatticeColourMatrix,LatticeGaugeField> ColourWilsonLoops;
|
typedef WilsonLoops<LatticeGaugeField> ColourWilsonLoops;
|
||||||
typedef WilsonLoops<LatticeColourMatrix,LatticeGaugeField> U1WilsonLoops;
|
typedef WilsonLoops<LatticeGaugeField> U1WilsonLoops;
|
||||||
typedef WilsonLoops<LatticeColourMatrix,LatticeGaugeField> SU2WilsonLoops;
|
typedef WilsonLoops<LatticeGaugeField> SU2WilsonLoops;
|
||||||
typedef WilsonLoops<LatticeColourMatrix,LatticeGaugeField> SU3WilsonLoops;
|
typedef WilsonLoops<LatticeGaugeField> SU3WilsonLoops;
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
49
lib/serialisation/BaseIO.h
Normal file
49
lib/serialisation/BaseIO.h
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
#ifndef GRID_SERIALISATION_ABSTRACT_READER_H
|
||||||
|
#define GRID_SERIALISATION_ABSTRACT_READER_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
class Writer {
|
||||||
|
public:
|
||||||
|
|
||||||
|
virtual ~Writer() {};
|
||||||
|
|
||||||
|
virtual void push(const std::string &s) = 0;
|
||||||
|
virtual void pop(void) =0;
|
||||||
|
virtual void write( const std::string& s,const std::string &output ) =0;
|
||||||
|
virtual void write( const std::string& s,const int16_t output ) =0;
|
||||||
|
virtual void write( const std::string& s,const uint16_t output ) =0;
|
||||||
|
virtual void write( const std::string& s,const int32_t output ) =0;
|
||||||
|
virtual void write( const std::string& s,const uint32_t output ) =0;
|
||||||
|
virtual void write( const std::string& s,const int64_t output ) =0;
|
||||||
|
virtual void write( const std::string& s,const uint64_t output ) =0;
|
||||||
|
virtual void write( const std::string& s,const float output ) =0;
|
||||||
|
virtual void write( const std::string& s,const double output ) =0;
|
||||||
|
virtual void write( const std::string& s,const bool output ) =0;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class Reader {
|
||||||
|
public:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
virtual ~Reader() {};
|
||||||
|
|
||||||
|
virtual void read( const std::string& s,std::string &output ) =0;
|
||||||
|
virtual void push(const std::string &s) =0;
|
||||||
|
virtual void pop(void) = 0;
|
||||||
|
|
||||||
|
virtual void read( const std::string& s, int16_t &output ) =0;
|
||||||
|
virtual void read( const std::string& s, uint16_t &output ) =0;
|
||||||
|
virtual void read( const std::string& s, int32_t &output ) =0;
|
||||||
|
virtual void read( const std::string& s, uint32_t &output ) =0;
|
||||||
|
virtual void read( const std::string& s, int64_t &output ) =0;
|
||||||
|
virtual void read( const std::string& s, uint64_t &output ) =0;
|
||||||
|
virtual void read( const std::string& s, float &output ) =0;
|
||||||
|
virtual void read( const std::string& s, double &output ) =0;
|
||||||
|
virtual void read( const std::string& s, bool &output ) =0;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
109
lib/serialisation/BinaryIO.h
Normal file
109
lib/serialisation/BinaryIO.h
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#ifndef GRID_SERIALISATION_BINARY_READER_H
|
||||||
|
#define GRID_SERIALISATION_BINARY_READER_H
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <math.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
class BinaryWriter : public Writer {
|
||||||
|
private:
|
||||||
|
|
||||||
|
std::ofstream file;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
BinaryWriter(const std::string &_file) : file(_file,std::ios::binary|std::ios::out) {}
|
||||||
|
|
||||||
|
~BinaryWriter() {}
|
||||||
|
|
||||||
|
// Binary is scopeless
|
||||||
|
void push(const std::string &s) {}
|
||||||
|
void pop(void) {}
|
||||||
|
|
||||||
|
void write(const std::string& s,const std::string &output) {
|
||||||
|
uint32_t sz = output.size();
|
||||||
|
write(s,sz);
|
||||||
|
const char * cstr = output.c_str();
|
||||||
|
for(int c=0;c<output.size();c++){
|
||||||
|
write(s,cstr[c]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
void write( const std::string& s,const char output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const int16_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const uint16_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const int32_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const uint32_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const int64_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const uint64_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const float output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const double output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const bool output ) { writeInternal(s,output); };
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<class T> void writeInternal( const std::string& s,const T output ){
|
||||||
|
// FIXME --- htons, htonl, htno64 etc..
|
||||||
|
file.write((char *)&output,sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class BinaryReader : public Reader{
|
||||||
|
private:
|
||||||
|
|
||||||
|
std::ifstream file;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
|
||||||
|
BinaryReader(const std::string &_file) : file(_file,std::ios::binary|std::ios::in) {}
|
||||||
|
|
||||||
|
~BinaryReader() {}
|
||||||
|
|
||||||
|
// Binary is scopeless
|
||||||
|
void push(const std::string &s) { }
|
||||||
|
void pop(void) { }
|
||||||
|
|
||||||
|
void read( const std::string& s,std::string &output ) {
|
||||||
|
|
||||||
|
output.clear();
|
||||||
|
|
||||||
|
uint32_t sz;
|
||||||
|
file.read((char *)&sz,sizeof(sz));
|
||||||
|
|
||||||
|
for(int c=0;c<sz;c++){
|
||||||
|
char ch;
|
||||||
|
file.read(&ch,sizeof(ch));
|
||||||
|
output.push_back(ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
void read( const std::string& s, int16_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, uint16_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, int32_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, uint32_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, int64_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, uint64_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, float &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, double &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, bool &output ) { readInternal(s,output); };
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
template<class T> void readInternal( const std::string& path, T &output ){
|
||||||
|
file.read((char *)&output,sizeof(output)); // byte order??
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -1,6 +1,47 @@
|
|||||||
#ifndef GRID_MACRO_MAGIC_H
|
#ifndef GRID_MACRO_MAGIC_H
|
||||||
#define GRID_MACRO_MAGIC_H
|
#define GRID_MACRO_MAGIC_H
|
||||||
|
|
||||||
|
///////////////////////////////////////////
|
||||||
|
// Strong credit to :
|
||||||
|
//
|
||||||
|
// http://jhnet.co.uk/articles/cpp_magic
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// "The C Pre-Processor (CPP) is the somewhat basic macro system used by the C
|
||||||
|
// programming language to implement features such as #include and #define
|
||||||
|
// which allow very simple text-substitutions to be carried out at compile time.
|
||||||
|
// In this article we abuse the humble #define to implement if-statements and iteration.
|
||||||
|
//
|
||||||
|
// Before we begin, a disclaimer: these tricks, while perfectly valid C, should not be
|
||||||
|
// considered good development practice and should almost certainly not be used for "real work".
|
||||||
|
// That said it can totally be used for fun home-automation projects...
|
||||||
|
//
|
||||||
|
// https://github.com/18sg/uSHET/blob/master/lib/cpp_magic.h
|
||||||
|
//
|
||||||
|
// The cpp_magic.h license (prior to my modifications):
|
||||||
|
/*
|
||||||
|
Copyright (c) 2014 Thomas Nixon, Jonathan Heathcote
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
///////////////////////////////////////////
|
||||||
|
|
||||||
#define strong_inline __attribute__((always_inline)) inline
|
#define strong_inline __attribute__((always_inline)) inline
|
||||||
|
|
||||||
#ifndef MAX
|
#ifndef MAX
|
||||||
@ -63,9 +104,15 @@
|
|||||||
|
|
||||||
#define _GRID_MACRO_MAP() GRID_MACRO_MAP
|
#define _GRID_MACRO_MAP() GRID_MACRO_MAP
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// These are the Grid extensions to serialisation (beyond popping first AND second)
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define GRID_MACRO_MEMBER(A,B) A B;
|
#define GRID_MACRO_MEMBER(A,B) A B;
|
||||||
|
|
||||||
#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" "#B <<" = "<< obj. B <<" ; " <<std::endl;
|
#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" "#B <<" = "<< obj. B <<" ; " <<std::endl;
|
||||||
|
#define GRID_MACRO_READ_MEMBER(A,B) read(RD,#B,obj. B);
|
||||||
|
#define GRID_MACRO_WRITE_MEMBER(A,B) write(WR,#B,obj. B);
|
||||||
|
|
||||||
#define GRID_DECL_CLASS_MEMBERS(cname,...) \
|
#define GRID_DECL_CLASS_MEMBERS(cname,...) \
|
||||||
\
|
\
|
||||||
@ -73,11 +120,26 @@
|
|||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__)) \
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__)) \
|
||||||
\
|
\
|
||||||
\
|
\
|
||||||
friend std::ostream & operator << (std::ostream &os, const cname &obj ) { \
|
friend void write(Writer &WR,const std::string &s, const cname &obj){ \
|
||||||
|
push(WR,s);\
|
||||||
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \
|
||||||
|
pop(WR);\
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
\
|
||||||
|
friend void read(Reader &RD,const std::string &s, cname &obj){ \
|
||||||
|
push(RD,s);\
|
||||||
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_READ_MEMBER,__VA_ARGS__)) \
|
||||||
|
pop(RD);\
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
\
|
||||||
|
friend std::ostream & operator << (std::ostream &os, const cname &obj ) { \
|
||||||
os<<"class "<<#cname<<" {"<<std::endl;\
|
os<<"class "<<#cname<<" {"<<std::endl;\
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_OS_WRITE_MEMBER,__VA_ARGS__)) \
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_OS_WRITE_MEMBER,__VA_ARGS__)) \
|
||||||
os<<"}"; \
|
os<<"}"; \
|
||||||
return os;\
|
return os;\
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
103
lib/serialisation/Serialisation.h
Normal file
103
lib/serialisation/Serialisation.h
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
#ifndef GRID_SERIALISATION_READER_H
|
||||||
|
#define GRID_SERIALISATION_READER_H
|
||||||
|
|
||||||
|
#include <serialisation/MacroMagic.h>
|
||||||
|
#include <serialisation/BaseIO.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
|
// Generic reader writer interface
|
||||||
|
inline void push(Writer & WR,const std::string &s) { WR.push(s);}
|
||||||
|
inline void push(Writer & WR,const char *s) { WR.push(std::string(s));}
|
||||||
|
inline void pop (Writer & WR) { WR.pop();}
|
||||||
|
|
||||||
|
// inline void write(Writer& wr, const std::string& s,const char * output ) { wr.write(s,std::string(output)); };
|
||||||
|
inline void write(Writer& wr, const std::string& s,const std::string &output) { wr.write(s,output); };
|
||||||
|
inline void write(Writer& wr, const std::string& s,const int16_t output ) { wr.write(s,output); };
|
||||||
|
inline void write(Writer& wr, const std::string& s,const uint16_t output ) { wr.write(s,output); };
|
||||||
|
inline void write(Writer& wr, const std::string& s,const int32_t output ) { wr.write(s,output); };
|
||||||
|
inline void write(Writer& wr, const std::string& s,const uint32_t output ) { wr.write(s,output); };
|
||||||
|
inline void write(Writer& wr, const std::string& s,const int64_t output ) { wr.write(s,output); };
|
||||||
|
inline void write(Writer& wr, const std::string& s,const uint64_t output ) { wr.write(s,output); };
|
||||||
|
inline void write(Writer& wr, const std::string& s,const float output ) { wr.write(s,output); };
|
||||||
|
inline void write(Writer& wr, const std::string& s,const double output ) { wr.write(s,output); };
|
||||||
|
inline void write(Writer& wr, const std::string& s,const bool output ) { wr.write(s,output); };
|
||||||
|
|
||||||
|
inline void push(Reader & WR,const std::string &s) { WR.push(s);}
|
||||||
|
inline void push(Reader & WR,const char *s) { WR.push(std::string(s));}
|
||||||
|
inline void pop (Reader & WR) { WR.pop();}
|
||||||
|
|
||||||
|
inline void read(Reader& rd, const std::string& s,std::string &output) { rd.read(s,output); };
|
||||||
|
inline void read(Reader& rd, const std::string& s, int16_t &output ) { rd.read(s,output); };
|
||||||
|
inline void read(Reader& rd, const std::string& s, uint16_t &output ) { rd.read(s,output); };
|
||||||
|
inline void read(Reader& rd, const std::string& s, int32_t &output ) { rd.read(s,output); };
|
||||||
|
inline void read(Reader& rd, const std::string& s, uint32_t &output ) { rd.read(s,output); };
|
||||||
|
inline void read(Reader& rd, const std::string& s, int64_t &output ) { rd.read(s,output); };
|
||||||
|
inline void read(Reader& rd, const std::string& s, uint64_t &output ) { rd.read(s,output); };
|
||||||
|
inline void read(Reader& rd, const std::string& s, float &output ) { rd.read(s,output); };
|
||||||
|
inline void read(Reader& rd, const std::string& s, double &output ) { rd.read(s,output); };
|
||||||
|
inline void read(Reader& rd, const std::string& s, bool &output ) { rd.read(s,output); };
|
||||||
|
|
||||||
|
|
||||||
|
template<class T> void write(Writer& wr, const std::string& s,const std::vector<T> output ) {
|
||||||
|
push(wr,s);
|
||||||
|
uint64_t sz =output.size();
|
||||||
|
write(wr,"N",sz);
|
||||||
|
for(int i=0;i<output.size();i++){
|
||||||
|
std::ostringstream oss; oss << "elem" << i;
|
||||||
|
write(wr,oss.str(),output[i]);
|
||||||
|
}
|
||||||
|
pop(wr);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
void read(Reader& rd, const std::string& s,std::vector<T> &output ) {
|
||||||
|
|
||||||
|
push(rd,s);
|
||||||
|
|
||||||
|
uint64_t sz; read(rd,"N",sz);
|
||||||
|
// skip the vector length
|
||||||
|
T tmp;
|
||||||
|
output.resize(0);
|
||||||
|
for(int i=0;i<sz;i++){
|
||||||
|
std::ostringstream oss; oss << "elem" << i;
|
||||||
|
read(rd,oss.str(),tmp);
|
||||||
|
output.push_back(tmp);
|
||||||
|
}
|
||||||
|
pop(rd);
|
||||||
|
};
|
||||||
|
|
||||||
|
template < class T >
|
||||||
|
inline std::ostream& operator << (std::ostream& os, const std::vector<T>& v)
|
||||||
|
{
|
||||||
|
os << "[";
|
||||||
|
for (typename std::vector<T>::const_iterator ii = v.begin(); ii != v.end(); ++ii)
|
||||||
|
{
|
||||||
|
os << " " << *ii;
|
||||||
|
}
|
||||||
|
os << " ]";
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Todo:
|
||||||
|
//////////////////////////////////////////
|
||||||
|
#include <serialisation/BinaryIO.h>
|
||||||
|
#include <serialisation/TextIO.h>
|
||||||
|
//#include <serialisation/JsonIO.h>
|
||||||
|
//#include <serialisation/YamlIO.h>
|
||||||
|
#include <serialisation/XmlIO.h>
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Select the default serialiser use ifdef's
|
||||||
|
//////////////////////////////////////////
|
||||||
|
namespace Grid {
|
||||||
|
typedef XMLReader DefaultReader;
|
||||||
|
typedef XMLWriter DefaultWriter;
|
||||||
|
}
|
||||||
|
#endif
|
148
lib/serialisation/TextIO.h
Normal file
148
lib/serialisation/TextIO.h
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
#ifndef GRID_SERIALISATION_TEXT_READER_H
|
||||||
|
#define GRID_SERIALISATION_TEXT_READER_H
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <math.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
class TextWriter : public Writer {
|
||||||
|
private:
|
||||||
|
|
||||||
|
std::ofstream file;
|
||||||
|
int level;
|
||||||
|
void indent(void) {
|
||||||
|
for(int i=0;i<level;i++){
|
||||||
|
file <<"\t";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
|
||||||
|
TextWriter(const std::string &_file) : file(_file,std::ios::out) {
|
||||||
|
level=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
~TextWriter() { }
|
||||||
|
|
||||||
|
void push(const std::string &s)
|
||||||
|
{
|
||||||
|
// std::string tmp = s;
|
||||||
|
// write(s,tmp);
|
||||||
|
level++;
|
||||||
|
}
|
||||||
|
void pop(void) {
|
||||||
|
level--;
|
||||||
|
}
|
||||||
|
|
||||||
|
void write( const std::string& s,const std::string &output ) {
|
||||||
|
indent();
|
||||||
|
file<<output<<std::endl;
|
||||||
|
};
|
||||||
|
void write( const std::string& s,const int16_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const uint16_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const int32_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const uint32_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const int64_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const uint64_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const float output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const double output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const bool output ) { writeInternal(s,output); };
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
template<class T> void writeInternal( const std::string& s,const T output ){
|
||||||
|
indent();
|
||||||
|
file << std::boolalpha << output<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class TextReader : public Reader {
|
||||||
|
private:
|
||||||
|
|
||||||
|
std::ifstream file;
|
||||||
|
int level;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
|
||||||
|
TextReader(const std::string &_file) : file(_file,std::ios::in) { level = 0;};
|
||||||
|
|
||||||
|
~TextReader() { }
|
||||||
|
|
||||||
|
void read( const std::string& s,std::string &output ) {
|
||||||
|
char c='a';
|
||||||
|
for(int i=0;i<level;i++){
|
||||||
|
file.get(c);
|
||||||
|
if ( c != '\t' )
|
||||||
|
std::cout << "mismatch on tab "<<c<<" level "<< level<< " i "<< i<<std::endl;
|
||||||
|
}
|
||||||
|
output.clear();
|
||||||
|
std::getline(file,output);
|
||||||
|
};
|
||||||
|
void push(const std::string &s) {
|
||||||
|
// std::string tmp; read(s,tmp);
|
||||||
|
level++;
|
||||||
|
}
|
||||||
|
void pop(void) { level--; }
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
void read( const std::string& s, std::vector<T> &output ) {
|
||||||
|
|
||||||
|
push(s);
|
||||||
|
|
||||||
|
uint64_t n; read("N",n);
|
||||||
|
|
||||||
|
// skip the vector length
|
||||||
|
T tmp;
|
||||||
|
output.resize(0);
|
||||||
|
for(int i=0;i<n;i++){
|
||||||
|
std::ostringstream oss; oss << "elem" << i;
|
||||||
|
read(*this,oss.str(),tmp);
|
||||||
|
output.push_back(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
pop();
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
void read( const std::string& s, int16_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, uint16_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, int32_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, uint32_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, int64_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, uint64_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, float &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, double &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, bool &output ) { readInternal(s,output); };
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
template<class T> void readInternal( const std::string& path, T &output ){
|
||||||
|
std::string asString;
|
||||||
|
read(path,asString);
|
||||||
|
convert(asString,output);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> void convert(const std::string &asString,T &output)
|
||||||
|
{
|
||||||
|
std::istringstream is(asString); is.exceptions(std::ios::failbit);
|
||||||
|
try {
|
||||||
|
is >> std::boolalpha >> output;
|
||||||
|
} catch(std::istringstream::failure e) {
|
||||||
|
std::cerr << "XML read failure on "<<" "<<asString<<" "<<typeid(T).name()<<std::endl;
|
||||||
|
}
|
||||||
|
assert( is.tellg()==-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
146
lib/serialisation/XmlIO.h
Normal file
146
lib/serialisation/XmlIO.h
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
#ifndef GRID_SERIALISATION_XML_READER_H
|
||||||
|
#define GRID_SERIALISATION_XML_READER_H
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <math.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include "pugixml/pugixml.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
class XMLWriter : public Writer {
|
||||||
|
private:
|
||||||
|
|
||||||
|
pugi::xml_document doc;
|
||||||
|
pugi::xml_node node;
|
||||||
|
std::string file;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
XMLWriter(const std::string &_file) : file(_file)
|
||||||
|
{
|
||||||
|
node=doc.append_child();
|
||||||
|
node.set_name("document");
|
||||||
|
}
|
||||||
|
|
||||||
|
~XMLWriter()
|
||||||
|
{
|
||||||
|
// simple_walker walker;
|
||||||
|
// doc.traverse(walker);
|
||||||
|
|
||||||
|
doc.save_file(file.c_str()," ");
|
||||||
|
}
|
||||||
|
|
||||||
|
void push(const std::string &s)
|
||||||
|
{
|
||||||
|
node = node.append_child(s.c_str());
|
||||||
|
}
|
||||||
|
void pop(void) {
|
||||||
|
node = node.parent();
|
||||||
|
}
|
||||||
|
|
||||||
|
void write( const std::string& s,const std::string &output ) {
|
||||||
|
pugi::xml_node leaf=node.append_child(s.c_str());
|
||||||
|
leaf.append_child(pugi::node_pcdata).set_value(output.c_str());
|
||||||
|
};
|
||||||
|
|
||||||
|
void write( const std::string& s,const int16_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const uint16_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const int32_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const uint32_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const int64_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const uint64_t output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const float output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const double output ) { writeInternal(s,output); };
|
||||||
|
void write( const std::string& s,const bool output ) { writeInternal(s,output); };
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
template<class T> void writeInternal( const std::string& s,const T output ){
|
||||||
|
std::ostringstream os;
|
||||||
|
os << std::boolalpha << output;
|
||||||
|
write(s,os.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class XMLReader : public Reader {
|
||||||
|
private:
|
||||||
|
|
||||||
|
pugi::xml_document doc;
|
||||||
|
pugi::xml_node node;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
|
||||||
|
XMLReader(const std::string &_file)
|
||||||
|
{
|
||||||
|
pugi::xml_parse_result result = doc.load_file(_file.c_str());
|
||||||
|
|
||||||
|
if ( !result ) {
|
||||||
|
std::cout << "XML error description: " << result.description() << "\n";
|
||||||
|
std::cout << "XML error offset: " << result.offset << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(result);
|
||||||
|
|
||||||
|
// simple_walker walker;
|
||||||
|
// doc.traverse(walker);
|
||||||
|
|
||||||
|
node= doc.child("document");
|
||||||
|
}
|
||||||
|
|
||||||
|
~XMLReader() { }
|
||||||
|
|
||||||
|
void read( const std::string& s,std::string &output ) {
|
||||||
|
output=node.child(s.c_str()).first_child().value();
|
||||||
|
};
|
||||||
|
void push(const std::string &s)
|
||||||
|
{
|
||||||
|
node = node.child(s.c_str());
|
||||||
|
}
|
||||||
|
void pop(void) {
|
||||||
|
node = node.parent();
|
||||||
|
}
|
||||||
|
|
||||||
|
void read( const std::string& s, int16_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, uint16_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, int32_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, uint32_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, int64_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, uint64_t &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, float &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, double &output ) { readInternal(s,output); };
|
||||||
|
void read( const std::string& s, bool &output ) { readInternal(s,output); };
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
template<class T> void readInternal( const std::string& path, T &output ){
|
||||||
|
std::string asString;
|
||||||
|
read(path,asString);
|
||||||
|
convert(asString,output);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> void convert(const std::string &asString,T &output)
|
||||||
|
{
|
||||||
|
std::istringstream is(asString); is.exceptions(std::ios::failbit);
|
||||||
|
try {
|
||||||
|
is >> std::boolalpha >> output;
|
||||||
|
} catch(std::istringstream::failure e) {
|
||||||
|
std::cerr << "XML read failure on "<<" "<<asString<<" "<<typeid(T).name()<<std::endl;
|
||||||
|
}
|
||||||
|
assert( is.tellg()==-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -183,11 +183,11 @@ namespace Optimization {
|
|||||||
// Complex float
|
// Complex float
|
||||||
inline __m256 operator()(__m256 a, __m256 b){
|
inline __m256 operator()(__m256 a, __m256 b){
|
||||||
__m256 ymm0,ymm1,ymm2;
|
__m256 ymm0,ymm1,ymm2;
|
||||||
ymm0 = _mm256_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
|
ymm0 = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar,
|
||||||
ymm0 = _mm256_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
ymm0 = _mm256_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
// FIXME AVX2 could MAC
|
// FIXME AVX2 could MAC
|
||||||
ymm1 = _mm256_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
|
ymm1 = _mm256_shuffle_ps(b,b,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // ymm1 <- br,bi
|
||||||
ymm2 = _mm256_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
|
ymm2 = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(3,3,1,1)); // ymm2 <- ai,ai
|
||||||
ymm1 = _mm256_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
ymm1 = _mm256_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||||
return _mm256_addsub_ps(ymm0,ymm1);
|
return _mm256_addsub_ps(ymm0,ymm1);
|
||||||
}
|
}
|
||||||
@ -270,7 +270,7 @@ namespace Optimization {
|
|||||||
//Complex single
|
//Complex single
|
||||||
inline __m256 operator()(__m256 in, __m256 ret){
|
inline __m256 operator()(__m256 in, __m256 ret){
|
||||||
__m256 tmp =_mm256_addsub_ps(_mm256_setzero_ps(),in); // r,-i
|
__m256 tmp =_mm256_addsub_ps(_mm256_setzero_ps(),in); // r,-i
|
||||||
return _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); //-i,r
|
return _mm256_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(2,3,0,1)); //-i,r
|
||||||
}
|
}
|
||||||
//Complex double
|
//Complex double
|
||||||
inline __m256d operator()(__m256d in, __m256d ret){
|
inline __m256d operator()(__m256d in, __m256d ret){
|
||||||
@ -282,7 +282,7 @@ namespace Optimization {
|
|||||||
struct TimesI{
|
struct TimesI{
|
||||||
//Complex single
|
//Complex single
|
||||||
inline __m256 operator()(__m256 in, __m256 ret){
|
inline __m256 operator()(__m256 in, __m256 ret){
|
||||||
__m256 tmp =_mm256_shuffle_ps(in,in,_MM_SHUFFLE(2,3,0,1)); // i,r
|
__m256 tmp =_mm256_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // i,r
|
||||||
return _mm256_addsub_ps(_mm256_setzero_ps(),tmp); // i,-r
|
return _mm256_addsub_ps(_mm256_setzero_ps(),tmp); // i,-r
|
||||||
}
|
}
|
||||||
//Complex double
|
//Complex double
|
||||||
@ -296,27 +296,44 @@ namespace Optimization {
|
|||||||
// Some Template specialization
|
// Some Template specialization
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
template < typename vtype >
|
struct Permute{
|
||||||
void permute(vtype &a,vtype b, int perm) {
|
|
||||||
uconv<vtype> conv;
|
static inline __m256 Permute0(__m256 in){
|
||||||
conv.v = b;
|
return _mm256_permute2f128_ps(in,in,0x01);
|
||||||
switch (perm){
|
};
|
||||||
// 8x32 bits=>3 permutes
|
static inline __m256 Permute1(__m256 in){
|
||||||
case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
|
return _mm256_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||||
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
|
};
|
||||||
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
|
static inline __m256 Permute2(__m256 in){
|
||||||
default: assert(0); break;
|
return _mm256_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||||
}
|
};
|
||||||
a = conv.v;
|
static inline __m256 Permute3(__m256 in){
|
||||||
}
|
return in;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline __m256d Permute0(__m256d in){
|
||||||
|
return _mm256_permute2f128_pd(in,in,0x01);
|
||||||
|
};
|
||||||
|
static inline __m256d Permute1(__m256d in){
|
||||||
|
return _mm256_shuffle_pd(in,in,0x5);
|
||||||
|
};
|
||||||
|
static inline __m256d Permute2(__m256d in){
|
||||||
|
return in;
|
||||||
|
};
|
||||||
|
static inline __m256d Permute3(__m256d in){
|
||||||
|
return in;
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
//Complex float Reduce
|
//Complex float Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m256>::operator()(__m256 in){
|
inline Grid::ComplexF Reduce<Grid::ComplexF, __m256>::operator()(__m256 in){
|
||||||
__m256 v1,v2;
|
__m256 v1,v2;
|
||||||
Optimization::permute(v1,in,0); // avx 256; quad complex single
|
v1=Optimization::Permute::Permute0(in); // avx 256; quad complex single
|
||||||
v1 = _mm256_add_ps(v1,in);
|
v1= _mm256_add_ps(v1,in);
|
||||||
Optimization::permute(v2,v1,1);
|
v2=Optimization::Permute::Permute1(v1);
|
||||||
v1 = _mm256_add_ps(v1,v2);
|
v1 = _mm256_add_ps(v1,v2);
|
||||||
u256f conv; conv.v = v1;
|
u256f conv; conv.v = v1;
|
||||||
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
||||||
@ -326,11 +343,11 @@ namespace Optimization {
|
|||||||
template<>
|
template<>
|
||||||
inline Grid::RealF Reduce<Grid::RealF, __m256>::operator()(__m256 in){
|
inline Grid::RealF Reduce<Grid::RealF, __m256>::operator()(__m256 in){
|
||||||
__m256 v1,v2;
|
__m256 v1,v2;
|
||||||
Optimization::permute(v1,in,0); // avx 256; octo-double
|
v1 = Optimization::Permute::Permute0(in); // avx 256; octo-double
|
||||||
v1 = _mm256_add_ps(v1,in);
|
v1 = _mm256_add_ps(v1,in);
|
||||||
Optimization::permute(v2,v1,1);
|
v2 = Optimization::Permute::Permute1(v1);
|
||||||
v1 = _mm256_add_ps(v1,v2);
|
v1 = _mm256_add_ps(v1,v2);
|
||||||
Optimization::permute(v2,v1,2);
|
v2 = Optimization::Permute::Permute2(v1);
|
||||||
v1 = _mm256_add_ps(v1,v2);
|
v1 = _mm256_add_ps(v1,v2);
|
||||||
u256f conv; conv.v=v1;
|
u256f conv; conv.v=v1;
|
||||||
return conv.f[0];
|
return conv.f[0];
|
||||||
@ -341,7 +358,7 @@ namespace Optimization {
|
|||||||
template<>
|
template<>
|
||||||
inline Grid::ComplexD Reduce<Grid::ComplexD, __m256d>::operator()(__m256d in){
|
inline Grid::ComplexD Reduce<Grid::ComplexD, __m256d>::operator()(__m256d in){
|
||||||
__m256d v1;
|
__m256d v1;
|
||||||
Optimization::permute(v1,in,0); // sse 128; paired complex single
|
v1 = Optimization::Permute::Permute0(in); // sse 128; paired complex single
|
||||||
v1 = _mm256_add_pd(v1,in);
|
v1 = _mm256_add_pd(v1,in);
|
||||||
u256d conv; conv.v = v1;
|
u256d conv; conv.v = v1;
|
||||||
return Grid::ComplexD(conv.f[0],conv.f[1]);
|
return Grid::ComplexD(conv.f[0],conv.f[1]);
|
||||||
@ -351,9 +368,9 @@ namespace Optimization {
|
|||||||
template<>
|
template<>
|
||||||
inline Grid::RealD Reduce<Grid::RealD, __m256d>::operator()(__m256d in){
|
inline Grid::RealD Reduce<Grid::RealD, __m256d>::operator()(__m256d in){
|
||||||
__m256d v1,v2;
|
__m256d v1,v2;
|
||||||
Optimization::permute(v1,in,0); // avx 256; quad double
|
v1 = Optimization::Permute::Permute0(in); // avx 256; quad double
|
||||||
v1 = _mm256_add_pd(v1,in);
|
v1 = _mm256_add_pd(v1,in);
|
||||||
Optimization::permute(v2,v1,1);
|
v2 = Optimization::Permute::Permute1(v1);
|
||||||
v1 = _mm256_add_pd(v1,v2);
|
v1 = _mm256_add_pd(v1,v2);
|
||||||
u256d conv; conv.v = v1;
|
u256d conv; conv.v = v1;
|
||||||
return conv.f[0];
|
return conv.f[0];
|
||||||
@ -387,13 +404,6 @@ namespace Grid {
|
|||||||
_mm_prefetch(ptr,_MM_HINT_T0);
|
_mm_prefetch(ptr,_MM_HINT_T0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template < typename VectorSIMD >
|
|
||||||
inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
|
|
||||||
Optimization::permute(y.v,b.v,perm);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Function name aliases
|
// Function name aliases
|
||||||
typedef Optimization::Vsplat VsplatSIMD;
|
typedef Optimization::Vsplat VsplatSIMD;
|
||||||
typedef Optimization::Vstore VstoreSIMD;
|
typedef Optimization::Vstore VstoreSIMD;
|
||||||
|
@ -149,49 +149,33 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Note, we can beat the shuf overhead in chain with two temporaries
|
||||||
|
// Ar Ai , Br Bi, Ai Ar // one shuf
|
||||||
|
//tmpr Ar Br, Ai Bi // Mul/Mac/Mac
|
||||||
|
//tmpi Br Ai, Bi Ar // Mul/Mac/Mac
|
||||||
|
// add tmpi,shuf(tmpi)
|
||||||
|
// sub tmpr,shuf(tmpi)
|
||||||
|
// shuf(tmpr,tmpi). // Could drop/trade for write mask
|
||||||
|
|
||||||
|
// Gives
|
||||||
|
// 2mul,4 mac +add+sub = 8 flop type insns
|
||||||
|
// 3shuf + 2 (+shuf) = 5/6 simd perm and 1/2 the load.
|
||||||
|
|
||||||
struct MultComplex{
|
struct MultComplex{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline __m512 operator()(__m512 a, __m512 b){
|
inline __m512 operator()(__m512 a, __m512 b){
|
||||||
__m512 vzero,ymm0,ymm1,real, imag;
|
// dup, dup, perm, mul, madd
|
||||||
vzero = _mm512_setzero_ps();
|
__m512 a_real = _mm512_moveldup_ps( a ); // Ar Ar
|
||||||
ymm0 = _mm512_swizzle_ps(a, _MM_SWIZ_REG_CDAB); //
|
__m512 a_imag = _mm512_movehdup_ps( a ); // Ai Ai
|
||||||
real = (__m512)_mm512_mask_or_epi32((__m512i)a, 0xAAAA,(__m512i)vzero,(__m512i)ymm0);
|
a_imag = _mm512_mul_ps( a_imag, _mm512_permute_ps( b, 0xB1 ) ); // (Ai, Ai) * (Bi, Br) = Ai Bi, Ai Br
|
||||||
imag = _mm512_mask_sub_ps(a, 0x5555,vzero, ymm0);
|
return _mm512_fmaddsub_ps( a_real, b, a_imag ); // Ar Br , Ar Bi +- Ai Bi = ArBr-AiBi , ArBi+AiBr
|
||||||
ymm1 = _mm512_mul_ps(real, b);
|
|
||||||
ymm0 = _mm512_swizzle_ps(b, _MM_SWIZ_REG_CDAB); // OK
|
|
||||||
return _mm512_fmadd_ps(ymm0,imag,ymm1);
|
|
||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline __m512d operator()(__m512d a, __m512d b){
|
inline __m512d operator()(__m512d a, __m512d b){
|
||||||
/* This is from
|
__m512d a_real = _mm512_shuffle_pd( a, a, 0x00 );
|
||||||
* Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets
|
__m512d a_imag = _mm512_shuffle_pd( a, a, 0xFF );
|
||||||
* @inproceedings{McFarlin:2011:ASV:1995896.1995938,
|
a_imag = _mm512_mul_pd( a_imag, _mm512_permute_pd( b, 0x55 ) );
|
||||||
* author = {McFarlin, Daniel S. and Arbatov, Volodymyr and Franchetti, Franz and P\"{u}schel, Markus},
|
return _mm512_fmaddsub_pd( a_real, b, a_imag );
|
||||||
* title = {Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets},
|
|
||||||
* booktitle = {Proceedings of the International Conference on Supercomputing},
|
|
||||||
* series = {ICS '11},
|
|
||||||
* year = {2011},
|
|
||||||
* isbn = {978-1-4503-0102-2},
|
|
||||||
* location = {Tucson, Arizona, USA},
|
|
||||||
* pages = {265--274},
|
|
||||||
* numpages = {10},
|
|
||||||
* url = {http://doi.acm.org/10.1145/1995896.1995938},
|
|
||||||
* doi = {10.1145/1995896.1995938},
|
|
||||||
* acmid = {1995938},
|
|
||||||
* publisher = {ACM},
|
|
||||||
* address = {New York, NY, USA},
|
|
||||||
* keywords = {autovectorization, fourier transform, program generation, simd, super-optimization},
|
|
||||||
* }
|
|
||||||
*/
|
|
||||||
__m512d vzero,ymm0,ymm1,real,imag;
|
|
||||||
vzero =_mm512_setzero_pd();
|
|
||||||
ymm0 = _mm512_swizzle_pd(a, _MM_SWIZ_REG_CDAB); //
|
|
||||||
real =(__m512d)_mm512_mask_or_epi64((__m512i)a, 0xAA,(__m512i)vzero,(__m512i) ymm0);
|
|
||||||
imag = _mm512_mask_sub_pd(a, 0x55,vzero, ymm0);
|
|
||||||
ymm1 = _mm512_mul_pd(real, b);
|
|
||||||
ymm0 = _mm512_swizzle_pd(b, _MM_SWIZ_REG_CDAB); // OK
|
|
||||||
return _mm512_fmadd_pd(ymm0,imag,ymm1);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -227,27 +211,25 @@ namespace Optimization {
|
|||||||
//Complex single
|
//Complex single
|
||||||
inline __m512 operator()(__m512 in, __m512 ret){
|
inline __m512 operator()(__m512 in, __m512 ret){
|
||||||
__m512 tmp = _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // real -imag
|
__m512 tmp = _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // real -imag
|
||||||
return _mm512_swizzle_ps(tmp, _MM_SWIZ_REG_CDAB);// OK
|
return _mm512_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(1,0,3,2)); // 0x4E??
|
||||||
}
|
}
|
||||||
//Complex double
|
//Complex double
|
||||||
inline __m512d operator()(__m512d in, __m512d ret){
|
inline __m512d operator()(__m512d in, __m512d ret){
|
||||||
__m512d tmp = _mm512_mask_sub_pd(in,0xaa,_mm512_setzero_pd(),in); // real -imag
|
__m512d tmp = _mm512_mask_sub_pd(in,0xaa,_mm512_setzero_pd(),in); // real -imag
|
||||||
return _mm512_swizzle_pd(tmp, _MM_SWIZ_REG_CDAB);// OK
|
return _mm512_shuffle_pd(tmp,tmp,0x55);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TimesI{
|
struct TimesI{
|
||||||
//Complex single
|
//Complex single
|
||||||
inline __m512 operator()(__m512 in, __m512 ret){
|
inline __m512 operator()(__m512 in, __m512 ret){
|
||||||
__m512 tmp = _mm512_swizzle_ps(in, _MM_SWIZ_REG_CDAB);// OK
|
__m512 tmp = _mm512_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||||
return _mm512_mask_sub_ps(tmp,0xaaaa,_mm512_setzero_ps(),tmp); // real -imag
|
return _mm512_mask_sub_ps(tmp,0xaaaa,_mm512_setzero_ps(),tmp);
|
||||||
}
|
}
|
||||||
//Complex double
|
//Complex double
|
||||||
inline __m512d operator()(__m512d in, __m512d ret){
|
inline __m512d operator()(__m512d in, __m512d ret){
|
||||||
__m512d tmp = _mm512_swizzle_pd(in, _MM_SWIZ_REG_CDAB);// OK
|
__m512d tmp = _mm512_shuffle_pd(tmp,tmp,0x55);
|
||||||
return _mm512_mask_sub_pd(tmp,0xaa,_mm512_setzero_pd(),tmp); // real -imag
|
return _mm512_mask_sub_pd(tmp,0xaa,_mm512_setzero_pd(),tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -255,6 +237,36 @@ namespace Optimization {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Gpermute utilities consider coalescing into 1 Gpermute
|
||||||
|
struct Permute{
|
||||||
|
|
||||||
|
static inline __m512 Permute0(__m512 in){
|
||||||
|
return _mm512_shuffle_f32x4(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||||
|
};
|
||||||
|
static inline __m512 Permute1(__m512 in){
|
||||||
|
return _mm512_shuffle_f32x4(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||||
|
};
|
||||||
|
static inline __m512 Permute2(__m512 in){
|
||||||
|
return _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||||
|
};
|
||||||
|
static inline __m512 Permute3(__m512 in){
|
||||||
|
return _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline __m512d Permute0(__m512d in){
|
||||||
|
return _mm512_shuffle_f64x2(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||||
|
};
|
||||||
|
static inline __m512d Permute1(__m512d in){
|
||||||
|
return _mm512_shuffle_f64x2(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||||
|
};
|
||||||
|
static inline __m512d Permute2(__m512d in){
|
||||||
|
return _mm512_shuffle_pd(in,in,0x55);
|
||||||
|
};
|
||||||
|
static inline __m512d Permute3(__m512d in){
|
||||||
|
return in;
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
@ -314,25 +326,6 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Gpermute utilities consider coalescing into 1 Gpermute
|
|
||||||
template < typename VectorSIMD >
|
|
||||||
inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
|
|
||||||
union {
|
|
||||||
__m512 f;
|
|
||||||
decltype(VectorSIMD::v) v;
|
|
||||||
} conv;
|
|
||||||
conv.v = b.v;
|
|
||||||
switch(perm){
|
|
||||||
case 3: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break;
|
|
||||||
case 2: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break;
|
|
||||||
case 1 : conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
|
|
||||||
case 0 : conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
|
|
||||||
default: assert(0); break;
|
|
||||||
}
|
|
||||||
y.v=conv.v;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Function name aliases
|
// Function name aliases
|
||||||
typedef Optimization::Vsplat VsplatSIMD;
|
typedef Optimization::Vsplat VsplatSIMD;
|
||||||
|
365
lib/simd/Grid_imci.h
Normal file
365
lib/simd/Grid_imci.h
Normal file
@ -0,0 +1,365 @@
|
|||||||
|
//----------------------------------------------------------------------
|
||||||
|
/*! @file Grid_knc.h
|
||||||
|
@brief Optimization libraries for AVX512 instructions set for KNC
|
||||||
|
|
||||||
|
Using intrinsics
|
||||||
|
*/
|
||||||
|
// Time-stamp: <2015-06-09 14:27:28 neo>
|
||||||
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
//#ifndef KNC_ONLY_STORES
|
||||||
|
//#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
|
||||||
|
//#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
|
||||||
|
//#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace Optimization {
|
||||||
|
|
||||||
|
struct Vsplat{
|
||||||
|
//Complex float
|
||||||
|
inline __m512 operator()(float a, float b){
|
||||||
|
return _mm512_set_ps(b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a);
|
||||||
|
}
|
||||||
|
// Real float
|
||||||
|
inline __m512 operator()(float a){
|
||||||
|
return _mm512_set1_ps(a);
|
||||||
|
}
|
||||||
|
//Complex double
|
||||||
|
inline __m512d operator()(double a, double b){
|
||||||
|
return _mm512_set_pd(b,a,b,a,b,a,b,a);
|
||||||
|
}
|
||||||
|
//Real double
|
||||||
|
inline __m512d operator()(double a){
|
||||||
|
return _mm512_set1_pd(a);
|
||||||
|
}
|
||||||
|
//Integer
|
||||||
|
inline __m512i operator()(Integer a){
|
||||||
|
return _mm512_set1_epi32(a);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Vstore{
|
||||||
|
//Float
|
||||||
|
inline void operator()(__m512 a, float* F){
|
||||||
|
_mm512_store_ps(F,a);
|
||||||
|
}
|
||||||
|
//Double
|
||||||
|
inline void operator()(__m512d a, double* D){
|
||||||
|
_mm512_store_pd(D,a);
|
||||||
|
}
|
||||||
|
//Integer
|
||||||
|
inline void operator()(__m512i a, Integer* I){
|
||||||
|
_mm512_store_si512((__m512i *)I,a);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct Vstream{
|
||||||
|
//Float
|
||||||
|
inline void operator()(float * a, __m512 b){
|
||||||
|
_mm512_storenrngo_ps(a,b);
|
||||||
|
}
|
||||||
|
//Double
|
||||||
|
inline void operator()(double * a, __m512d b){
|
||||||
|
_mm512_storenrngo_pd(a,b);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
struct Vset{
|
||||||
|
// Complex float
|
||||||
|
inline __m512 operator()(Grid::ComplexF *a){
|
||||||
|
return _mm512_set_ps(a[7].imag(),a[7].real(),a[6].imag(),a[6].real(),
|
||||||
|
a[5].imag(),a[5].real(),a[4].imag(),a[4].real(),
|
||||||
|
a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),
|
||||||
|
a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
||||||
|
}
|
||||||
|
// Complex double
|
||||||
|
inline __m512d operator()(Grid::ComplexD *a){
|
||||||
|
return _mm512_set_pd(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),
|
||||||
|
a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
||||||
|
}
|
||||||
|
// Real float
|
||||||
|
inline __m512 operator()(float *a){
|
||||||
|
return _mm512_set_ps( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
|
||||||
|
a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||||
|
}
|
||||||
|
// Real double
|
||||||
|
inline __m512d operator()(double *a){
|
||||||
|
return _mm512_set_pd(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||||
|
}
|
||||||
|
// Integer
|
||||||
|
inline __m512i operator()(Integer *a){
|
||||||
|
return _mm512_set_epi32( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
|
||||||
|
a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Out_type, typename In_type>
|
||||||
|
struct Reduce{
|
||||||
|
//Need templated class to overload output type
|
||||||
|
//General form must generate error if compiled
|
||||||
|
inline Out_type operator()(In_type in){
|
||||||
|
printf("Error, using wrong Reduce function\n");
|
||||||
|
exit(1);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Arithmetic operations
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
struct Sum{
|
||||||
|
//Complex/Real float
|
||||||
|
inline __m512 operator()(__m512 a, __m512 b){
|
||||||
|
return _mm512_add_ps(a,b);
|
||||||
|
}
|
||||||
|
//Complex/Real double
|
||||||
|
inline __m512d operator()(__m512d a, __m512d b){
|
||||||
|
return _mm512_add_pd(a,b);
|
||||||
|
}
|
||||||
|
//Integer
|
||||||
|
inline __m512i operator()(__m512i a, __m512i b){
|
||||||
|
return _mm512_add_epi32(a,b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Sub{
|
||||||
|
//Complex/Real float
|
||||||
|
inline __m512 operator()(__m512 a, __m512 b){
|
||||||
|
return _mm512_sub_ps(a,b);
|
||||||
|
}
|
||||||
|
//Complex/Real double
|
||||||
|
inline __m512d operator()(__m512d a, __m512d b){
|
||||||
|
return _mm512_sub_pd(a,b);
|
||||||
|
}
|
||||||
|
//Integer
|
||||||
|
inline __m512i operator()(__m512i a, __m512i b){
|
||||||
|
return _mm512_sub_epi32(a,b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct MultComplex{
|
||||||
|
// Complex float
|
||||||
|
inline __m512 operator()(__m512 a, __m512 b){
|
||||||
|
__m512 vzero,ymm0,ymm1,real, imag;
|
||||||
|
vzero = _mm512_setzero_ps();
|
||||||
|
ymm0 = _mm512_swizzle_ps(a, _MM_SWIZ_REG_CDAB); //
|
||||||
|
real = (__m512)_mm512_mask_or_epi32((__m512i)a, 0xAAAA,(__m512i)vzero,(__m512i)ymm0);
|
||||||
|
imag = _mm512_mask_sub_ps(a, 0x5555,vzero, ymm0);
|
||||||
|
ymm1 = _mm512_mul_ps(real, b);
|
||||||
|
ymm0 = _mm512_swizzle_ps(b, _MM_SWIZ_REG_CDAB); // OK
|
||||||
|
return _mm512_fmadd_ps(ymm0,imag,ymm1);
|
||||||
|
}
|
||||||
|
// Complex double
|
||||||
|
inline __m512d operator()(__m512d a, __m512d b){
|
||||||
|
/* This is from
|
||||||
|
* Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets
|
||||||
|
* @inproceedings{McFarlin:2011:ASV:1995896.1995938,
|
||||||
|
* author = {McFarlin, Daniel S. and Arbatov, Volodymyr and Franchetti, Franz and P\"{u}schel, Markus},
|
||||||
|
* title = {Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets},
|
||||||
|
* booktitle = {Proceedings of the International Conference on Supercomputing},
|
||||||
|
* series = {ICS '11},
|
||||||
|
* year = {2011},
|
||||||
|
* isbn = {978-1-4503-0102-2},
|
||||||
|
* location = {Tucson, Arizona, USA},
|
||||||
|
* pages = {265--274},
|
||||||
|
* numpages = {10},
|
||||||
|
* url = {http://doi.acm.org/10.1145/1995896.1995938},
|
||||||
|
* doi = {10.1145/1995896.1995938},
|
||||||
|
* acmid = {1995938},
|
||||||
|
* publisher = {ACM},
|
||||||
|
* address = {New York, NY, USA},
|
||||||
|
* keywords = {autovectorization, fourier transform, program generation, simd, super-optimization},
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
__m512d vzero,ymm0,ymm1,real,imag;
|
||||||
|
vzero =_mm512_setzero_pd();
|
||||||
|
ymm0 = _mm512_swizzle_pd(a, _MM_SWIZ_REG_CDAB); //
|
||||||
|
real =(__m512d)_mm512_mask_or_epi64((__m512i)a, 0xAA,(__m512i)vzero,(__m512i) ymm0);
|
||||||
|
imag = _mm512_mask_sub_pd(a, 0x55,vzero, ymm0);
|
||||||
|
ymm1 = _mm512_mul_pd(real, b);
|
||||||
|
ymm0 = _mm512_swizzle_pd(b, _MM_SWIZ_REG_CDAB); // OK
|
||||||
|
return _mm512_fmadd_pd(ymm0,imag,ymm1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Mult{
|
||||||
|
// Real float
|
||||||
|
inline __m512 operator()(__m512 a, __m512 b){
|
||||||
|
return _mm512_mul_ps(a,b);
|
||||||
|
}
|
||||||
|
// Real double
|
||||||
|
inline __m512d operator()(__m512d a, __m512d b){
|
||||||
|
return _mm512_mul_pd(a,b);
|
||||||
|
}
|
||||||
|
// Integer
|
||||||
|
inline __m512i operator()(__m512i a, __m512i b){
|
||||||
|
return _mm512_mullo_epi32(a,b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct Conj{
|
||||||
|
// Complex single
|
||||||
|
inline __m512 operator()(__m512 in){
|
||||||
|
return _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // Zero out 0+real 0-imag
|
||||||
|
}
|
||||||
|
// Complex double
|
||||||
|
inline __m512d operator()(__m512d in){
|
||||||
|
return _mm512_mask_sub_pd(in, 0xaa,_mm512_setzero_pd(), in);
|
||||||
|
}
|
||||||
|
// do not define for integer input
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TimesMinusI{
|
||||||
|
//Complex single
|
||||||
|
inline __m512 operator()(__m512 in, __m512 ret){
|
||||||
|
__m512 tmp = _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // real -imag
|
||||||
|
return _mm512_swizzle_ps(tmp, _MM_SWIZ_REG_CDAB);// OK
|
||||||
|
}
|
||||||
|
//Complex double
|
||||||
|
inline __m512d operator()(__m512d in, __m512d ret){
|
||||||
|
__m512d tmp = _mm512_mask_sub_pd(in,0xaa,_mm512_setzero_pd(),in); // real -imag
|
||||||
|
return _mm512_swizzle_pd(tmp, _MM_SWIZ_REG_CDAB);// OK
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TimesI{
|
||||||
|
//Complex single
|
||||||
|
inline __m512 operator()(__m512 in, __m512 ret){
|
||||||
|
__m512 tmp = _mm512_swizzle_ps(in, _MM_SWIZ_REG_CDAB);// OK
|
||||||
|
return _mm512_mask_sub_ps(tmp,0xaaaa,_mm512_setzero_ps(),tmp); // real -imag
|
||||||
|
}
|
||||||
|
//Complex double
|
||||||
|
inline __m512d operator()(__m512d in, __m512d ret){
|
||||||
|
__m512d tmp = _mm512_swizzle_pd(in, _MM_SWIZ_REG_CDAB);// OK
|
||||||
|
return _mm512_mask_sub_pd(tmp,0xaa,_mm512_setzero_pd(),tmp); // real -imag
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct Permute{
|
||||||
|
|
||||||
|
static inline __m512 Permute0(__m512 in){
|
||||||
|
return _mm512_permute4f128_ps(in,(_MM_PERM_ENUM)_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||||
|
};
|
||||||
|
static inline __m512 Permute1(__m512 in){
|
||||||
|
return _mm512_permute4f128_ps(in,(_MM_PERM_ENUM)_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||||
|
};
|
||||||
|
static inline __m512 Permute2(__m512 in){
|
||||||
|
return _mm512_swizzle_ps(in,_MM_SWIZ_REG_BADC);
|
||||||
|
};
|
||||||
|
static inline __m512 Permute3(__m512 in){
|
||||||
|
return _mm512_swizzle_ps(in,_MM_SWIZ_REG_CDAB);
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline __m512d Permute0(__m512d in){// Hack no intrinsic for 256 swaps of __m512d
|
||||||
|
return (__m512d)_mm512_permute4f128_ps((__m512)in,(_MM_PERM_ENUM)_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||||
|
};
|
||||||
|
static inline __m512d Permute1(__m512d in){
|
||||||
|
return _mm512_swizzle_pd(in,_MM_SWIZ_REG_BADC);
|
||||||
|
};
|
||||||
|
static inline __m512d Permute2(__m512d in){
|
||||||
|
return _mm512_swizzle_pd(in,_MM_SWIZ_REG_CDAB);
|
||||||
|
};
|
||||||
|
static inline __m512d Permute3(__m512d in){
|
||||||
|
return in;
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////
|
||||||
|
// Some Template specialization
|
||||||
|
|
||||||
|
//Complex float Reduce
|
||||||
|
template<>
|
||||||
|
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
|
||||||
|
return Grid::ComplexF(_mm512_mask_reduce_add_ps(0x5555, in),_mm512_mask_reduce_add_ps(0xAAAA, in));
|
||||||
|
}
|
||||||
|
//Real float Reduce
|
||||||
|
template<>
|
||||||
|
inline Grid::RealF Reduce<Grid::RealF, __m512>::operator()(__m512 in){
|
||||||
|
return _mm512_reduce_add_ps(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Complex double Reduce
|
||||||
|
template<>
|
||||||
|
inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){
|
||||||
|
return Grid::ComplexD(_mm512_mask_reduce_add_pd(0x55, in),_mm512_mask_reduce_add_pd(0xAA, in));
|
||||||
|
}
|
||||||
|
|
||||||
|
//Real double Reduce
|
||||||
|
template<>
|
||||||
|
inline Grid::RealD Reduce<Grid::RealD, __m512d>::operator()(__m512d in){
|
||||||
|
return _mm512_reduce_add_pd(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Integer Reduce
|
||||||
|
template<>
|
||||||
|
inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){
|
||||||
|
// FIXME unimplemented
|
||||||
|
printf("Reduce : Missing integer implementation -> FIX\n");
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Here assign types
|
||||||
|
namespace Grid {
|
||||||
|
typedef __m512 SIMD_Ftype; // Single precision type
|
||||||
|
typedef __m512d SIMD_Dtype; // Double precision type
|
||||||
|
typedef __m512i SIMD_Itype; // Integer type
|
||||||
|
|
||||||
|
// prefecth
|
||||||
|
inline void v_prefetch0(int size, const char *ptr){
|
||||||
|
for(int i=0;i<size;i+=64){ // Define L1 linesize above
|
||||||
|
_mm_prefetch(ptr+i+4096,_MM_HINT_T1);
|
||||||
|
_mm_prefetch(ptr+i+512,_MM_HINT_T0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inline void prefetch_HINT_T0(const char *ptr){
|
||||||
|
_mm_prefetch(ptr,_MM_HINT_T0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Function name aliases
|
||||||
|
typedef Optimization::Vsplat VsplatSIMD;
|
||||||
|
typedef Optimization::Vstore VstoreSIMD;
|
||||||
|
typedef Optimization::Vset VsetSIMD;
|
||||||
|
typedef Optimization::Vstream VstreamSIMD;
|
||||||
|
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||||
|
|
||||||
|
|
||||||
|
// Arithmetic operations
|
||||||
|
typedef Optimization::Sum SumSIMD;
|
||||||
|
typedef Optimization::Sub SubSIMD;
|
||||||
|
typedef Optimization::Mult MultSIMD;
|
||||||
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
|
typedef Optimization::Conj ConjSIMD;
|
||||||
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
|
||||||
|
}
|
@ -151,10 +151,10 @@ namespace Optimization {
|
|||||||
// Complex float
|
// Complex float
|
||||||
inline __m128 operator()(__m128 a, __m128 b){
|
inline __m128 operator()(__m128 a, __m128 b){
|
||||||
__m128 ymm0,ymm1,ymm2;
|
__m128 ymm0,ymm1,ymm2;
|
||||||
ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
|
ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar,
|
||||||
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
|
ymm1 = _mm_shuffle_ps(b,b,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // ymm1 <- br,bi
|
||||||
ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
|
ymm2 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(3,3,1,1)); // ymm2 <- ai,ai
|
||||||
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||||
return _mm_addsub_ps(ymm0,ymm1);
|
return _mm_addsub_ps(ymm0,ymm1);
|
||||||
}
|
}
|
||||||
@ -201,7 +201,7 @@ namespace Optimization {
|
|||||||
//Complex single
|
//Complex single
|
||||||
inline __m128 operator()(__m128 in, __m128 ret){
|
inline __m128 operator()(__m128 in, __m128 ret){
|
||||||
__m128 tmp =_mm_addsub_ps(_mm_setzero_ps(),in); // r,-i
|
__m128 tmp =_mm_addsub_ps(_mm_setzero_ps(),in); // r,-i
|
||||||
return _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
return _mm_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||||
}
|
}
|
||||||
//Complex double
|
//Complex double
|
||||||
inline __m128d operator()(__m128d in, __m128d ret){
|
inline __m128d operator()(__m128d in, __m128d ret){
|
||||||
@ -215,7 +215,7 @@ namespace Optimization {
|
|||||||
struct TimesI{
|
struct TimesI{
|
||||||
//Complex single
|
//Complex single
|
||||||
inline __m128 operator()(__m128 in, __m128 ret){
|
inline __m128 operator()(__m128 in, __m128 ret){
|
||||||
__m128 tmp =_mm_shuffle_ps(in,in,_MM_SHUFFLE(2,3,0,1));
|
__m128 tmp =_mm_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||||
return _mm_addsub_ps(_mm_setzero_ps(),tmp); // r,-i
|
return _mm_addsub_ps(_mm_setzero_ps(),tmp); // r,-i
|
||||||
}
|
}
|
||||||
//Complex double
|
//Complex double
|
||||||
@ -225,27 +225,45 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Permute{
|
||||||
|
|
||||||
|
static inline __m128 Permute0(__m128 in){
|
||||||
|
return _mm_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||||
|
};
|
||||||
|
static inline __m128 Permute1(__m128 in){
|
||||||
|
return _mm_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||||
|
};
|
||||||
|
static inline __m128 Permute2(__m128 in){
|
||||||
|
return in;
|
||||||
|
};
|
||||||
|
static inline __m128 Permute3(__m128 in){
|
||||||
|
return in;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline __m128d Permute0(__m128d in){
|
||||||
|
return _mm_shuffle_pd(in,in,0x1);
|
||||||
|
};
|
||||||
|
static inline __m128d Permute1(__m128d in){
|
||||||
|
return in;
|
||||||
|
};
|
||||||
|
static inline __m128d Permute2(__m128d in){
|
||||||
|
return in;
|
||||||
|
};
|
||||||
|
static inline __m128d Permute3(__m128d in){
|
||||||
|
return in;
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
// Some Template specialization
|
// Some Template specialization
|
||||||
template < typename vtype >
|
|
||||||
void permute(vtype &a, vtype b, int perm) {
|
|
||||||
uconv<vtype> conv;
|
|
||||||
conv.v = b;
|
|
||||||
switch(perm){
|
|
||||||
case 3: break; //empty for SSE4
|
|
||||||
case 2: break; //empty for SSE4
|
|
||||||
case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
|
|
||||||
case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
|
|
||||||
default: assert(0); break;
|
|
||||||
}
|
|
||||||
a=conv.v;
|
|
||||||
};
|
|
||||||
|
|
||||||
//Complex float Reduce
|
//Complex float Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m128>::operator()(__m128 in){
|
inline Grid::ComplexF Reduce<Grid::ComplexF, __m128>::operator()(__m128 in){
|
||||||
__m128 v1; // two complex
|
__m128 v1; // two complex
|
||||||
Optimization::permute(v1,in,0);
|
v1= Optimization::Permute::Permute0(in);
|
||||||
v1= _mm_add_ps(v1,in);
|
v1= _mm_add_ps(v1,in);
|
||||||
u128f conv; conv.v=v1;
|
u128f conv; conv.v=v1;
|
||||||
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
||||||
@ -254,9 +272,9 @@ namespace Optimization {
|
|||||||
template<>
|
template<>
|
||||||
inline Grid::RealF Reduce<Grid::RealF, __m128>::operator()(__m128 in){
|
inline Grid::RealF Reduce<Grid::RealF, __m128>::operator()(__m128 in){
|
||||||
__m128 v1,v2; // quad single
|
__m128 v1,v2; // quad single
|
||||||
Optimization::permute(v1,in,0);
|
v1= Optimization::Permute::Permute0(in);
|
||||||
v1= _mm_add_ps(v1,in);
|
v1= _mm_add_ps(v1,in);
|
||||||
Optimization::permute(v2,v1,1);
|
v2= Optimization::Permute::Permute1(v1);
|
||||||
v1 = _mm_add_ps(v1,v2);
|
v1 = _mm_add_ps(v1,v2);
|
||||||
u128f conv; conv.v=v1;
|
u128f conv; conv.v=v1;
|
||||||
return conv.f[0];
|
return conv.f[0];
|
||||||
@ -274,7 +292,7 @@ namespace Optimization {
|
|||||||
template<>
|
template<>
|
||||||
inline Grid::RealD Reduce<Grid::RealD, __m128d>::operator()(__m128d in){
|
inline Grid::RealD Reduce<Grid::RealD, __m128d>::operator()(__m128d in){
|
||||||
__m128d v1;
|
__m128d v1;
|
||||||
Optimization::permute(v1,in,0); // avx 256; quad double
|
v1 = Optimization::Permute::Permute0(in);
|
||||||
v1 = _mm_add_pd(v1,in);
|
v1 = _mm_add_pd(v1,in);
|
||||||
u128d conv; conv.v = v1;
|
u128d conv; conv.v = v1;
|
||||||
return conv.f[0];
|
return conv.f[0];
|
||||||
@ -302,14 +320,6 @@ namespace Grid {
|
|||||||
inline void prefetch_HINT_T0(const char *ptr){
|
inline void prefetch_HINT_T0(const char *ptr){
|
||||||
_mm_prefetch(ptr,_MM_HINT_T0);
|
_mm_prefetch(ptr,_MM_HINT_T0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Gpermute function
|
|
||||||
template < typename VectorSIMD >
|
|
||||||
inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
|
|
||||||
Optimization::permute(y.v,b.v,perm);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Function name aliases
|
// Function name aliases
|
||||||
typedef Optimization::Vsplat VsplatSIMD;
|
typedef Optimization::Vsplat VsplatSIMD;
|
||||||
|
@ -19,6 +19,9 @@
|
|||||||
#if defined AVX512
|
#if defined AVX512
|
||||||
#include "Grid_avx512.h"
|
#include "Grid_avx512.h"
|
||||||
#endif
|
#endif
|
||||||
|
#if defined IMCI
|
||||||
|
#include "Grid_imci.h"
|
||||||
|
#endif
|
||||||
#if defined QPX
|
#if defined QPX
|
||||||
#include "Grid_qpx.h"
|
#include "Grid_qpx.h"
|
||||||
#endif
|
#endif
|
||||||
@ -248,30 +251,43 @@ namespace Grid {
|
|||||||
// all subtypes; may not be a good assumption, but could
|
// all subtypes; may not be a good assumption, but could
|
||||||
// add the vector width as a template param for BG/Q for example
|
// add the vector width as a template param for BG/Q for example
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
|
friend inline void permute0(Grid_simd &y,Grid_simd b){
|
||||||
|
y.v = Optimization::Permute::Permute0(b.v);
|
||||||
|
}
|
||||||
|
friend inline void permute1(Grid_simd &y,Grid_simd b){
|
||||||
|
y.v = Optimization::Permute::Permute1(b.v);
|
||||||
|
}
|
||||||
|
friend inline void permute2(Grid_simd &y,Grid_simd b){
|
||||||
|
y.v = Optimization::Permute::Permute2(b.v);
|
||||||
|
}
|
||||||
|
friend inline void permute3(Grid_simd &y,Grid_simd b){
|
||||||
|
y.v = Optimization::Permute::Permute3(b.v);
|
||||||
|
}
|
||||||
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
||||||
{
|
{
|
||||||
Gpermute<Grid_simd>(y,b,perm);
|
if (perm==3) permute3(y,b);
|
||||||
|
else if (perm==2) permute2(y,b);
|
||||||
|
else if (perm==1) permute1(y,b);
|
||||||
|
else if (perm==0) permute0(y,b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
};// end of Grid_simd class definition
|
};// end of Grid_simd class definition
|
||||||
|
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// Splat
|
// Splat
|
||||||
///////////////////////
|
///////////////////////
|
||||||
|
|
||||||
// this is only for the complex version
|
// this is only for the complex version
|
||||||
template <class S, class V, IfComplex<S> =0, class ABtype>
|
template <class S, class V, IfComplex<S> =0, class ABtype>
|
||||||
inline void vsplat(Grid_simd<S,V> &ret,ABtype a, ABtype b){
|
inline void vsplat(Grid_simd<S,V> &ret,ABtype a, ABtype b){
|
||||||
ret.v = binary<V>(a, b, VsplatSIMD());
|
ret.v = binary<V>(a, b, VsplatSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
// overload if complex
|
// overload if complex
|
||||||
template <class S,class V> inline void vsplat(Grid_simd<S,V> &ret, EnableIf<is_complex < S >, S> c) {
|
template <class S,class V> inline void vsplat(Grid_simd<S,V> &ret, EnableIf<is_complex < S >, S> c) {
|
||||||
Real a = real(c);
|
vsplat(ret,real(c),imag(c));
|
||||||
Real b = imag(c);
|
|
||||||
vsplat(ret,a,b);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//if real fill with a, if complex fill with a in the real part (first function above)
|
//if real fill with a, if complex fill with a in the real part (first function above)
|
||||||
@ -290,8 +306,8 @@ namespace Grid {
|
|||||||
template <class S,class V, IfComplex<S> = 0 > inline void vcomplex_i(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0,1.0));}
|
template <class S,class V, IfComplex<S> = 0 > inline void vcomplex_i(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0,1.0));}
|
||||||
|
|
||||||
// if not complex overload here
|
// if not complex overload here
|
||||||
template <class S,class V, IfReal<S> = 0 > inline void vone (Grid_simd<S,V> &ret){ vsplat(ret,1.0); }
|
template <class S,class V, IfReal<S> = 0 > inline void vone (Grid_simd<S,V> &ret){ vsplat(ret,S(1.0)); }
|
||||||
template <class S,class V, IfReal<S> = 0 > inline void vzero(Grid_simd<S,V> &ret) { vsplat(ret,0.0); }
|
template <class S,class V, IfReal<S> = 0 > inline void vzero(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0)); }
|
||||||
|
|
||||||
// For integral types
|
// For integral types
|
||||||
template <class S,class V,IfInteger<S> = 0 > inline void vone(Grid_simd<S,V> &ret) {vsplat(ret,1); }
|
template <class S,class V,IfInteger<S> = 0 > inline void vone(Grid_simd<S,V> &ret) {vsplat(ret,1); }
|
||||||
@ -304,13 +320,18 @@ namespace Grid {
|
|||||||
///////////////////////
|
///////////////////////
|
||||||
// Vstream
|
// Vstream
|
||||||
///////////////////////
|
///////////////////////
|
||||||
template <class S,class V, IfNotInteger<S> = 0 >
|
template <class S,class V, IfReal<S> = 0 >
|
||||||
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
||||||
binary<void>((Real*)&out.v, in.v, VstreamSIMD());
|
binary<void>((S *)&out.v, in.v, VstreamSIMD());
|
||||||
}
|
}
|
||||||
|
template <class S,class V, IfComplex<S> = 0 >
|
||||||
|
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
||||||
|
typedef typename S::value_type T;
|
||||||
|
binary<void>((T *)&out.v, in.v, VstreamSIMD());
|
||||||
|
}
|
||||||
|
|
||||||
template <class S,class V, IfInteger<S> = 0 >
|
template <class S,class V, IfInteger<S> = 0 >
|
||||||
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
||||||
out=in;
|
out=in;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@ template<class vtype> class iScalar
|
|||||||
public:
|
public:
|
||||||
vtype _internal;
|
vtype _internal;
|
||||||
|
|
||||||
|
typedef vtype element;
|
||||||
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
|
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
|
||||||
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
|
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
|
||||||
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
|
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
|
||||||
@ -124,6 +125,7 @@ template<class vtype,int N> class iVector
|
|||||||
public:
|
public:
|
||||||
vtype _internal[N];
|
vtype _internal[N];
|
||||||
|
|
||||||
|
typedef vtype element;
|
||||||
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
|
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
|
||||||
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
|
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
|
||||||
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
|
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
|
||||||
@ -175,6 +177,7 @@ public:
|
|||||||
permute(out._internal[i],in._internal[i],permutetype);
|
permute(out._internal[i],in._internal[i],permutetype);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unary negation
|
// Unary negation
|
||||||
friend strong_inline iVector<vtype,N> operator -(const iVector<vtype,N> &r) {
|
friend strong_inline iVector<vtype,N> operator -(const iVector<vtype,N> &r) {
|
||||||
iVector<vtype,N> ret;
|
iVector<vtype,N> ret;
|
||||||
@ -219,6 +222,7 @@ template<class vtype,int N> class iMatrix
|
|||||||
public:
|
public:
|
||||||
vtype _internal[N][N];
|
vtype _internal[N][N];
|
||||||
|
|
||||||
|
typedef vtype element;
|
||||||
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
|
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
|
||||||
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
|
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
|
||||||
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
|
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
|
||||||
@ -287,12 +291,15 @@ public:
|
|||||||
vstream(out._internal[i][j],in._internal[i][j]);
|
vstream(out._internal[i][j],in._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
friend strong_inline void permute(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int permutetype){
|
friend strong_inline void permute(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int permutetype){
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
permute(out._internal[i][j],in._internal[i][j],permutetype);
|
permute(out._internal[i][j],in._internal[i][j],permutetype);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Unary negation
|
// Unary negation
|
||||||
friend strong_inline iMatrix<vtype,N> operator -(const iMatrix<vtype,N> &r) {
|
friend strong_inline iMatrix<vtype,N> operator -(const iMatrix<vtype,N> &r) {
|
||||||
iMatrix<vtype,N> ret;
|
iMatrix<vtype,N> ret;
|
||||||
|
@ -35,19 +35,22 @@ icpc-avx-openmp-mpi)
|
|||||||
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp -lgmp -lmpfr" --enable-comms=mpi
|
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp -lgmp -lmpfr" --enable-comms=mpi
|
||||||
;;
|
;;
|
||||||
icpc-avx-openmp)
|
icpc-avx-openmp)
|
||||||
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -std=c++11" LIBS="-fopenmp -lgmp -lmpfr" --enable-comms=mpi
|
CXX=icpc ../../configure --enable-precision=single --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -std=c++11" LIBS="-fopenmp -lgmp -lmpfr" --enable-comms=mpi
|
||||||
;;
|
;;
|
||||||
icpc-avx2)
|
icpc-avx2)
|
||||||
CXX=icpc ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
CXX=icpc ../../configure --enable-simd=AVX2 CXXFLAGS="-march=core-avx2 -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||||
;;
|
;;
|
||||||
icpc-avx512)
|
icpc-avx512)
|
||||||
CXX=icpc ../../configure --enable-simd=AVX512 CXXFLAGS="-xCOMMON-AVX512 -O3 -std=c++11" --host=none LIBS="-lgmp -lmpfr" --enable-comms=none
|
CXX=icpc ../../configure --enable-simd=AVX512 CXXFLAGS="-xCOMMON-AVX512 -O3 -std=c++11" --host=none LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||||
;;
|
;;
|
||||||
icpc-mic)
|
icpc-mic)
|
||||||
CXX=icpc ../../configure --host=none --enable-simd=AVX512 CXXFLAGS="-mmic -O3 -std=c++11" LDFLAGS=-mmic LIBS="-lgmp -lmpfr" --enable-comms=none
|
CXX=icpc ../../configure --host=none --enable-simd=IMCI CXXFLAGS="-mmic -O3 -std=c++11" LDFLAGS=-mmic LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||||
|
;;
|
||||||
|
icpc-mic-avx512)
|
||||||
|
CXX=icpc ../../configure --host=none --enable-simd=IMCI CXXFLAGS="-xCOMMON_AVX512 -O3 -std=c++11" LDFLAGS=-xCOMMON_AVX512 LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||||
;;
|
;;
|
||||||
clang-sse)
|
clang-sse)
|
||||||
CXX=clang++ ../../configure --enable-simd=SSE4 CXXFLAGS="-msse4 -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
CXX=clang++ ../../configure --enable-precision=single --enable-simd=SSE4 CXXFLAGS="-msse4 -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||||
;;
|
;;
|
||||||
clang-avx)
|
clang-avx)
|
||||||
CXX=clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
CXX=clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||||
|
@ -1,6 +1,35 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
LOG=$1
|
LOG=$1
|
||||||
SWEEPS=$2
|
SWEEPS=`grep dH $LOG | wc -l`
|
||||||
grep Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$10} END { print S/NR} '
|
SWEEPS=`expr $SWEEPS - 80`
|
||||||
grep dH $LOG | tail -n $SWEEPS | awk '{ S=S+exp(-$10)} END { print S/NR} '
|
echo
|
||||||
|
echo $SWEEPS thermalised sweeps
|
||||||
|
echo
|
||||||
|
plaq=`grep Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$10} END { print S/NR} ' `
|
||||||
|
plaqe=`grep Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$10 ; SS=SS+$10*$10 } END { print sqrt( (SS/NR - S*S/NR/NR)/NR) } ' `
|
||||||
|
echo "Plaquette: $plaq (${plaqe})"
|
||||||
|
echo
|
||||||
|
|
||||||
|
dHv=`grep dH $LOG | tail -n $SWEEPS | awk '{ S=S+$10 ; SS=SS+$10*$10 } END { print sqrt(SS/NR) } ' `
|
||||||
|
edH=`grep dH $LOG | tail -n $SWEEPS | awk '{ S=S+exp(-$10)} END { print S/NR} '`
|
||||||
|
echo "<e-dH>: $edH"
|
||||||
|
echo "<rms dH>: $dHv"
|
||||||
|
|
||||||
|
TRAJ=`grep Acc $LOG | wc -l`
|
||||||
|
ACC=`grep Acc $LOG | grep ACCE | wc -l`
|
||||||
|
PACC=`expr 100 \* ${ACC} / ${TRAJ} `
|
||||||
|
echo
|
||||||
|
echo "Acceptance $PACC % $ACC / $TRAJ "
|
||||||
|
|
||||||
|
grep Plaq $LOG | awk '{ print $10 }' | uniq > plaq.dat
|
||||||
|
grep dH $LOG | awk '{ print $10 }' > dH.dat
|
||||||
|
echo set yrange [-0.1:0.6] > plot.gnu
|
||||||
|
echo set terminal 'pdf' >> plot.gnu
|
||||||
|
echo "set output 'plaq.${LOG}.pdf'" >> plot.gnu
|
||||||
|
echo "plot 'plaq.dat' w l, 'dH.dat' w l " >> plot.gnu
|
||||||
|
echo
|
||||||
|
gnuplot plot.gnu >& gnu.errs
|
||||||
|
open plaq.${LOG}.pdf
|
||||||
|
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ shift
|
|||||||
while (( "$#" )); do
|
while (( "$#" )); do
|
||||||
|
|
||||||
echo $1
|
echo $1
|
||||||
sed -e "s:${WAS}:${IS}:g" -i .bak $1
|
sed -e "s@${WAS}@${IS}@g" -i .bak $1
|
||||||
|
|
||||||
shift
|
shift
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_hdcr Test_gamma Test_gparity Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
|
bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_hdcr Test_gamma Test_hmc_EODWFRatio Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
|
||||||
|
|
||||||
|
|
||||||
Test_GaugeAction_SOURCES=Test_GaugeAction.cc
|
Test_GaugeAction_SOURCES=Test_GaugeAction.cc
|
||||||
@ -86,12 +86,16 @@ Test_dwf_hdcr_SOURCES=Test_dwf_hdcr.cc
|
|||||||
Test_dwf_hdcr_LDADD=-lGrid
|
Test_dwf_hdcr_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
|
#Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
|
||||||
|
#Test_dwf_lanczos_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
Test_gamma_SOURCES=Test_gamma.cc
|
Test_gamma_SOURCES=Test_gamma.cc
|
||||||
Test_gamma_LDADD=-lGrid
|
Test_gamma_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
Test_gparity_SOURCES=Test_gparity.cc
|
#Test_gparity_SOURCES=Test_gparity.cc
|
||||||
Test_gparity_LDADD=-lGrid
|
#Test_gparity_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
|
Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
|
||||||
@ -174,6 +178,10 @@ Test_rng_fixed_SOURCES=Test_rng_fixed.cc
|
|||||||
Test_rng_fixed_LDADD=-lGrid
|
Test_rng_fixed_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
|
Test_serialisation_SOURCES=Test_serialisation.cc
|
||||||
|
Test_serialisation_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
Test_simd_SOURCES=Test_simd.cc
|
Test_simd_SOURCES=Test_simd.cc
|
||||||
Test_simd_LDADD=-lGrid
|
Test_simd_LDADD=-lGrid
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ int main (int argc, char ** argv)
|
|||||||
NerscField header;
|
NerscField header;
|
||||||
|
|
||||||
std::string file("./ckpoint_lat.4000");
|
std::string file("./ckpoint_lat.4000");
|
||||||
readNerscConfiguration(Umu,header,file);
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
@ -42,7 +42,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
NerscField header;
|
NerscField header;
|
||||||
std::string file("./ckpoint_lat.400");
|
std::string file("./ckpoint_lat.400");
|
||||||
readNerscConfiguration(Umu,header,file);
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
// SU3::ColdConfiguration(RNG4,Umu);
|
// SU3::ColdConfiguration(RNG4,Umu);
|
||||||
// SU3::TepidConfiguration(RNG4,Umu);
|
// SU3::TepidConfiguration(RNG4,Umu);
|
||||||
|
@ -388,7 +388,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
NerscField header;
|
NerscField header;
|
||||||
std::string file("./ckpoint_lat.4000");
|
std::string file("./ckpoint_lat.4000");
|
||||||
readNerscConfiguration(Umu,header,file);
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
// SU3::ColdConfiguration(RNG4,Umu);
|
// SU3::ColdConfiguration(RNG4,Umu);
|
||||||
// SU3::TepidConfiguration(RNG4,Umu);
|
// SU3::TepidConfiguration(RNG4,Umu);
|
||||||
|
57
tests/Test_dwf_lanczos.cc
Normal file
57
tests/Test_dwf_lanczos.cc
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Ls=8;
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
|
LatticeFermion src(FGrid); gaussian(RNG5,src);
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
SU3::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD mass=0.1;
|
||||||
|
RealD M5=1.8;
|
||||||
|
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
|
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||||
|
|
||||||
|
const int Nk = 10;
|
||||||
|
const int Np = 1;
|
||||||
|
RealD enorm = 1.0;
|
||||||
|
RealD vthrs = 1;
|
||||||
|
const int Nit= 1000;
|
||||||
|
|
||||||
|
ImplicitlyRestartedLanczos<LatticeFermion> IRL(HermOp,PolyX,
|
||||||
|
Nk,Np,enorm,vthrs,Nit);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<RealD> eval(Nk);
|
||||||
|
std::vector<LatticeFermion> evec(Nk,FGrid);
|
||||||
|
IRL.calc(eval,evec,
|
||||||
|
src,
|
||||||
|
Nsbt,
|
||||||
|
Nconv);
|
||||||
|
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -17,7 +17,9 @@ int main (int argc, char ** argv)
|
|||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
GridSerialRNG sRNG;
|
||||||
GridParallelRNG pRNG(UGrid);
|
GridParallelRNG pRNG(UGrid);
|
||||||
|
sRNG.SeedRandomDevice();
|
||||||
pRNG.SeedRandomDevice();
|
pRNG.SeedRandomDevice();
|
||||||
|
|
||||||
LatticeLorentzColourMatrix U(UGrid);
|
LatticeLorentzColourMatrix U(UGrid);
|
||||||
@ -25,7 +27,7 @@ int main (int argc, char ** argv)
|
|||||||
SU3::HotConfiguration(pRNG, U);
|
SU3::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
// simplify template declaration? Strip the lorentz from the second template
|
// simplify template declaration? Strip the lorentz from the second template
|
||||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
WilsonGaugeActionR Waction(5.6);
|
||||||
|
|
||||||
Real mass=0.04;
|
Real mass=0.04;
|
||||||
Real pv =1.0;
|
Real pv =1.0;
|
||||||
@ -38,21 +40,20 @@ int main (int argc, char ** argv)
|
|||||||
TwoFlavourEvenOddRatioPseudoFermionAction<WilsonImplR> Nf2(NumOp, DenOp,CG,CG);
|
TwoFlavourEvenOddRatioPseudoFermionAction<WilsonImplR> Nf2(NumOp, DenOp,CG,CG);
|
||||||
|
|
||||||
//Collect actions
|
//Collect actions
|
||||||
ActionLevel Level1;
|
ActionLevel<LatticeGaugeField> Level1;
|
||||||
Level1.push_back(&Nf2);
|
Level1.push_back(&Nf2);
|
||||||
Level1.push_back(&Waction);
|
Level1.push_back(&Waction);
|
||||||
ActionSet FullSet;
|
ActionSet<LatticeGaugeField> FullSet;
|
||||||
FullSet.push_back(Level1);
|
FullSet.push_back(Level1);
|
||||||
|
|
||||||
// Create integrator
|
// Create integrator
|
||||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
typedef MinimumNorm2<LatticeGaugeField> IntegratorType;// change here to change the algorithm
|
||||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
IntegratorParameters MDpar(20);
|
||||||
IntegratorParameters MDpar(12,20,1.0);
|
IntegratorType MDynamics(UGrid,MDpar, FullSet);
|
||||||
Integrator<IntegratorAlgorithm> MDynamics(UGrid,MDpar, FullSet);
|
|
||||||
|
|
||||||
// Create HMC
|
// Create HMC
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
HybridMonteCarlo<LatticeGaugeField,IntegratorType> HMC(HMCpar, MDynamics,sRNG,pRNG);
|
||||||
|
|
||||||
// Run it
|
// Run it
|
||||||
HMC.evolve(U);
|
HMC.evolve(U);
|
||||||
|
@ -15,40 +15,50 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
std::vector<int> seeds({6,7,8,80});
|
||||||
GridParallelRNG pRNG(&Fine);
|
GridParallelRNG pRNG(&Fine);
|
||||||
pRNG.SeedRandomDevice();
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
std::vector<int> seedsS({1,2,3,4});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(seedsS);
|
||||||
|
|
||||||
LatticeLorentzColourMatrix U(&Fine);
|
LatticeLorentzColourMatrix U(&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG, U);
|
SU3::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
// simplify template declaration? Strip the lorentz from the second template
|
// simplify template declaration? Strip the lorentz from the second template
|
||||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
WilsonGaugeActionR Waction(5.6);
|
||||||
|
|
||||||
Real mass=-0.77;
|
Real mass=-0.77;
|
||||||
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
||||||
|
|
||||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||||
ConjugateGradient<LatticeFermion> CGmd(1.0e-6,10000);
|
|
||||||
|
|
||||||
TwoFlavourEvenOddPseudoFermionAction<WilsonImplR> WilsonNf2(FermOp,CG,CG);
|
TwoFlavourEvenOddPseudoFermionAction<WilsonImplR> WilsonNf2(FermOp,CG,CG);
|
||||||
|
|
||||||
//Collect actions
|
//Collect actions
|
||||||
ActionLevel Level1;
|
ActionLevel<LatticeGaugeField> Level1(1);
|
||||||
|
ActionLevel<LatticeGaugeField> Level2(4);
|
||||||
Level1.push_back(&WilsonNf2);
|
Level1.push_back(&WilsonNf2);
|
||||||
Level1.push_back(&Waction);
|
Level2.push_back(&Waction);
|
||||||
ActionSet FullSet;
|
|
||||||
|
ActionSet<LatticeGaugeField> FullSet;
|
||||||
FullSet.push_back(Level1);
|
FullSet.push_back(Level1);
|
||||||
|
FullSet.push_back(Level2);
|
||||||
|
|
||||||
// Create integrator
|
// Create integrator
|
||||||
// typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
//typedef LeapFrog<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||||
typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
//typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||||
IntegratorParameters MDpar(12,40,1.0);
|
typedef ForceGradient<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||||
std::vector<int> rel ={1};
|
|
||||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
IntegratorParameters MDpar(16);
|
||||||
|
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||||
|
|
||||||
// Create HMC
|
// Create HMC
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics,sRNG,pRNG);
|
||||||
|
|
||||||
HMC.evolve(U);
|
HMC.evolve(U);
|
||||||
|
|
||||||
|
@ -15,14 +15,21 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
std::vector<int> seeds({6,7,8,80});
|
||||||
GridParallelRNG pRNG(&Fine);
|
GridParallelRNG pRNG(&Fine);
|
||||||
pRNG.SeedRandomDevice();
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
std::vector<int> seedsS({1,2,3,4});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(seedsS);
|
||||||
|
|
||||||
LatticeLorentzColourMatrix U(&Fine);
|
LatticeLorentzColourMatrix U(&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG, U);
|
SU3::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
// simplify template declaration? Strip the lorentz from the second template
|
// simplify template declaration? Strip the lorentz from the second template
|
||||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
WilsonGaugeActionR Waction(5.6);
|
||||||
|
|
||||||
Real mass=-0.77;
|
Real mass=-0.77;
|
||||||
Real pv =0.0;
|
Real pv =0.0;
|
||||||
@ -34,22 +41,22 @@ int main (int argc, char ** argv)
|
|||||||
TwoFlavourEvenOddRatioPseudoFermionAction<WilsonImplR> WilsonNf2(NumOp, DenOp,CG,CG);
|
TwoFlavourEvenOddRatioPseudoFermionAction<WilsonImplR> WilsonNf2(NumOp, DenOp,CG,CG);
|
||||||
|
|
||||||
//Collect actions
|
//Collect actions
|
||||||
ActionLevel Level1;
|
ActionLevel<LatticeGaugeField> Level1;
|
||||||
Level1.push_back(&WilsonNf2);
|
Level1.push_back(&WilsonNf2);
|
||||||
Level1.push_back(&Waction);
|
Level1.push_back(&Waction);
|
||||||
ActionSet FullSet;
|
ActionSet<LatticeGaugeField> FullSet;
|
||||||
FullSet.push_back(Level1);
|
FullSet.push_back(Level1);
|
||||||
|
|
||||||
|
|
||||||
// Create integrator
|
// Create integrator
|
||||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||||
IntegratorParameters MDpar(12,20,1.0);
|
IntegratorParameters MDpar(20);
|
||||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||||
|
|
||||||
// Create HMC
|
// Create HMC
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics,sRNG,pRNG);
|
||||||
|
|
||||||
HMC.evolve(U);
|
HMC.evolve(U);
|
||||||
|
|
||||||
|
@ -15,40 +15,49 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
std::vector<int> seeds({6,7,8,80});
|
||||||
GridParallelRNG pRNG(&Fine);
|
GridParallelRNG pRNG(&Fine);
|
||||||
pRNG.SeedRandomDevice();
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
std::vector<int> seedsS({1,2,3,4});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(seedsS);
|
||||||
|
|
||||||
LatticeLorentzColourMatrix U(&Fine);
|
LatticeLorentzColourMatrix U(&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG, U);
|
SU3::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
// simplify template declaration? Strip the lorentz from the second template
|
// simplify template declaration? Strip the lorentz from the second template
|
||||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
WilsonGaugeActionR Waction(5.6);
|
||||||
|
|
||||||
Real mass=-0.77;
|
Real mass=-0.77;
|
||||||
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
||||||
|
|
||||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||||
ConjugateGradient<LatticeFermion> CGmd(1.0e-6,10000);
|
|
||||||
|
|
||||||
TwoFlavourPseudoFermionAction<WilsonImplR> WilsonNf2(FermOp,CG,CG);
|
TwoFlavourPseudoFermionAction<WilsonImplR> WilsonNf2(FermOp,CG,CG);
|
||||||
|
|
||||||
//Collect actions
|
//Collect actions
|
||||||
ActionLevel Level1;
|
ActionLevel<LatticeGaugeField> Level1(1);
|
||||||
|
ActionLevel<LatticeGaugeField> Level2(4);
|
||||||
Level1.push_back(&WilsonNf2);
|
Level1.push_back(&WilsonNf2);
|
||||||
Level1.push_back(&Waction);
|
Level1.push_back(&Waction);
|
||||||
ActionSet FullSet;
|
// Level1.push_back(&Waction);
|
||||||
|
|
||||||
|
ActionSet<LatticeGaugeField> FullSet;
|
||||||
FullSet.push_back(Level1);
|
FullSet.push_back(Level1);
|
||||||
// FullSet.push_back(Level2);
|
// FullSet.push_back(Level2);
|
||||||
|
|
||||||
// Create integrator
|
// Create integrator
|
||||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||||
IntegratorParameters MDpar(12,20,1.0);
|
IntegratorParameters MDpar(20);
|
||||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||||
|
|
||||||
// Create HMC
|
// Create HMC
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics,sRNG,pRNG);
|
||||||
|
|
||||||
HMC.evolve(U);
|
HMC.evolve(U);
|
||||||
|
|
||||||
|
@ -22,30 +22,37 @@ int main (int argc, char ** argv)
|
|||||||
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> seeds({6,7,8,80});
|
||||||
GridParallelRNG pRNG(&Fine);
|
GridParallelRNG pRNG(&Fine);
|
||||||
pRNG.SeedRandomDevice();
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
std::vector<int> seedsS({1,2,3,4});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(seedsS);
|
||||||
|
|
||||||
LatticeGaugeField U(&Fine);
|
LatticeGaugeField U(&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG, U);
|
SU3::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
|
|
||||||
// simplify template declaration? Strip the lorentz from the second template
|
// simplify template declaration? Strip the lorentz from the second template
|
||||||
WilsonGaugeAction<LatticeGaugeField, LatticeColourMatrix> Waction(6.0);
|
WilsonGaugeActionR Waction(6.0);
|
||||||
|
|
||||||
//Collect actions
|
//Collect actions
|
||||||
ActionLevel Level1;
|
ActionLevel<LatticeGaugeField> Level1;
|
||||||
Level1.push_back(&Waction);
|
Level1.push_back(&Waction);
|
||||||
ActionSet FullSet;
|
ActionSet<LatticeGaugeField> FullSet;
|
||||||
FullSet.push_back(Level1);
|
FullSet.push_back(Level1);
|
||||||
|
|
||||||
// Create integrator
|
// Create integrator
|
||||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to modify the algorithm
|
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to modify the algorithm
|
||||||
IntegratorParameters MDpar(12,5,1.0);
|
IntegratorParameters MDpar(20);
|
||||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||||
|
|
||||||
// Create HMC
|
// Create HMC
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||||
|
|
||||||
HMC.evolve(U);
|
HMC.evolve(U);
|
||||||
|
|
||||||
|
@ -15,14 +15,21 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
std::vector<int> seeds({6,7,8,80});
|
||||||
GridParallelRNG pRNG(&Fine);
|
GridParallelRNG pRNG(&Fine);
|
||||||
pRNG.SeedRandomDevice();
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
std::vector<int> seedsS({1,2,3,4});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(seedsS);
|
||||||
|
|
||||||
LatticeLorentzColourMatrix U(&Fine);
|
LatticeLorentzColourMatrix U(&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG, U);
|
SU3::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
// simplify template declaration? Strip the lorentz from the second template
|
// simplify template declaration? Strip the lorentz from the second template
|
||||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
WilsonGaugeActionR Waction(5.6);
|
||||||
|
|
||||||
Real mass=-0.77;
|
Real mass=-0.77;
|
||||||
Real pv =0.0;
|
Real pv =0.0;
|
||||||
@ -34,22 +41,22 @@ int main (int argc, char ** argv)
|
|||||||
TwoFlavourRatioPseudoFermionAction<WilsonImplR> WilsonNf2(NumOp,FermOp,CG,CG);
|
TwoFlavourRatioPseudoFermionAction<WilsonImplR> WilsonNf2(NumOp,FermOp,CG,CG);
|
||||||
|
|
||||||
//Collect actions
|
//Collect actions
|
||||||
ActionLevel Level1;
|
ActionLevel<LatticeGaugeField> Level1;
|
||||||
Level1.push_back(&WilsonNf2);
|
Level1.push_back(&WilsonNf2);
|
||||||
Level1.push_back(&Waction);
|
Level1.push_back(&Waction);
|
||||||
ActionSet FullSet;
|
ActionSet<LatticeGaugeField> FullSet;
|
||||||
FullSet.push_back(Level1);
|
FullSet.push_back(Level1);
|
||||||
|
|
||||||
|
|
||||||
// Create integrator
|
// Create integrator
|
||||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||||
IntegratorParameters MDpar(12,20,1.0);
|
IntegratorParameters MDpar(20);
|
||||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||||
|
|
||||||
// Create HMC
|
// Create HMC
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||||
|
|
||||||
HMC.evolve(U);
|
HMC.evolve(U);
|
||||||
|
|
||||||
|
@ -17,7 +17,8 @@ public:
|
|||||||
pRNG.SeedFixedIntegers(seeds);
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
random(pRNG,sqrtscale);
|
random(pRNG,sqrtscale);
|
||||||
sqrtscale = real(sqrtscale)*3.0+0.5;// force real pos def
|
sqrtscale = 0.5*(sqrtscale + conjugate(sqrtscale));
|
||||||
|
sqrtscale = sqrtscale*3.0+0.5;// force real pos def
|
||||||
scale = sqrtscale *sqrtscale; //scale should be bounded by 12.25
|
scale = sqrtscale *sqrtscale; //scale should be bounded by 12.25
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -28,7 +28,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
NerscField header;
|
NerscField header;
|
||||||
std::string file("./ckpoint_lat.4000");
|
std::string file("./ckpoint_lat.4000");
|
||||||
readNerscConfiguration(Umu,header,file);
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
@ -89,6 +89,13 @@ int main (int argc, char ** argv)
|
|||||||
TComplex TcP = sum(cPlaq);
|
TComplex TcP = sum(cPlaq);
|
||||||
Complex ll= TensorRemove(TcP);
|
Complex ll= TensorRemove(TcP);
|
||||||
std::cout<<GridLogMessage << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl;
|
std::cout<<GridLogMessage << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl;
|
||||||
|
|
||||||
|
std::string clone2x3("./ckpoint_clone2x3.4000");
|
||||||
|
std::string clone3x3("./ckpoint_clone3x3.4000");
|
||||||
|
int precision32 = 0;
|
||||||
|
|
||||||
|
NerscIO::writeConfiguration(Umu,clone3x3,0,precision32);
|
||||||
|
NerscIO::writeConfiguration(Umu,clone2x3,1,precision32);
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -73,8 +73,10 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(Umu,link,mu);
|
PokeIndex<LorentzIndex>(Umu,link,mu);
|
||||||
//reunitarise link;
|
|
||||||
|
|
||||||
|
//reunitarise link;
|
||||||
|
ProjectOnGroup(Umu);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -15,14 +15,22 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> seeds({6,7,8,80});
|
||||||
GridParallelRNG pRNG(&Fine);
|
GridParallelRNG pRNG(&Fine);
|
||||||
pRNG.SeedRandomDevice();
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
std::vector<int> seedsS({1,2,3,4});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(seedsS);
|
||||||
|
|
||||||
LatticeLorentzColourMatrix U(&Fine);
|
LatticeLorentzColourMatrix U(&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG, U);
|
SU3::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
// simplify template declaration? Strip the lorentz from the second template
|
// simplify template declaration? Strip the lorentz from the second template
|
||||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
WilsonGaugeActionR Waction(5.6);
|
||||||
|
|
||||||
Real mass=-0.77;
|
Real mass=-0.77;
|
||||||
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
||||||
@ -33,23 +41,23 @@ int main (int argc, char ** argv)
|
|||||||
OneFlavourEvenOddRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(FermOp,Params);
|
OneFlavourEvenOddRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(FermOp,Params);
|
||||||
|
|
||||||
//Collect actions
|
//Collect actions
|
||||||
ActionLevel Level1;
|
ActionLevel<LatticeGaugeField> Level1;
|
||||||
Level1.push_back(&WilsonNf1a);
|
Level1.push_back(&WilsonNf1a);
|
||||||
Level1.push_back(&WilsonNf1b);
|
Level1.push_back(&WilsonNf1b);
|
||||||
Level1.push_back(&Waction);
|
Level1.push_back(&Waction);
|
||||||
|
|
||||||
ActionSet FullSet;
|
ActionSet<LatticeGaugeField> FullSet;
|
||||||
FullSet.push_back(Level1);
|
FullSet.push_back(Level1);
|
||||||
|
|
||||||
// Create integrator
|
// Create integrator
|
||||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||||
|
|
||||||
IntegratorParameters MDpar(12,20,1.0);
|
IntegratorParameters MDpar(20);
|
||||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||||
|
|
||||||
// Create HMC
|
// Create HMC
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||||
HMC.evolve(U);
|
HMC.evolve(U);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -15,14 +15,22 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> seeds({6,7,8,80});
|
||||||
GridParallelRNG pRNG(&Fine);
|
GridParallelRNG pRNG(&Fine);
|
||||||
pRNG.SeedRandomDevice();
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
std::vector<int> seedsS({1,2,3,4});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(seedsS);
|
||||||
|
|
||||||
LatticeLorentzColourMatrix U(&Fine);
|
LatticeLorentzColourMatrix U(&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG, U);
|
SU3::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
// simplify template declaration? Strip the lorentz from the second template
|
// simplify template declaration? Strip the lorentz from the second template
|
||||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
WilsonGaugeActionR Waction(5.6);
|
||||||
|
|
||||||
Real mass=-0.77;
|
Real mass=-0.77;
|
||||||
Real pv =0.0;
|
Real pv =0.0;
|
||||||
@ -35,23 +43,23 @@ int main (int argc, char ** argv)
|
|||||||
OneFlavourEvenOddRatioRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(NumOp,DenOp,Params);
|
OneFlavourEvenOddRatioRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(NumOp,DenOp,Params);
|
||||||
|
|
||||||
//Collect actions
|
//Collect actions
|
||||||
ActionLevel Level1;
|
ActionLevel<LatticeGaugeField> Level1;
|
||||||
Level1.push_back(&WilsonNf1a);
|
Level1.push_back(&WilsonNf1a);
|
||||||
Level1.push_back(&WilsonNf1b);
|
Level1.push_back(&WilsonNf1b);
|
||||||
Level1.push_back(&Waction);
|
Level1.push_back(&Waction);
|
||||||
ActionSet FullSet;
|
ActionSet<LatticeGaugeField> FullSet;
|
||||||
FullSet.push_back(Level1);
|
FullSet.push_back(Level1);
|
||||||
|
|
||||||
|
|
||||||
// Create integrator
|
// Create integrator
|
||||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||||
IntegratorParameters MDpar(12,20,1.0);
|
IntegratorParameters MDpar(20);
|
||||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||||
|
|
||||||
// Create HMC
|
// Create HMC
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||||
|
|
||||||
HMC.evolve(U);
|
HMC.evolve(U);
|
||||||
|
|
||||||
|
@ -15,14 +15,22 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> seeds({6,7,8,80});
|
||||||
GridParallelRNG pRNG(&Fine);
|
GridParallelRNG pRNG(&Fine);
|
||||||
pRNG.SeedRandomDevice();
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
std::vector<int> seedsS({1,2,3,4});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(seedsS);
|
||||||
|
|
||||||
LatticeLorentzColourMatrix U(&Fine);
|
LatticeLorentzColourMatrix U(&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG, U);
|
SU3::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
// simplify template declaration? Strip the lorentz from the second template
|
// simplify template declaration? Strip the lorentz from the second template
|
||||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
WilsonGaugeActionR Waction(5.6);
|
||||||
|
|
||||||
Real mass=-0.77;
|
Real mass=-0.77;
|
||||||
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
||||||
@ -33,23 +41,23 @@ int main (int argc, char ** argv)
|
|||||||
OneFlavourRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(FermOp,Params);
|
OneFlavourRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(FermOp,Params);
|
||||||
|
|
||||||
//Collect actions
|
//Collect actions
|
||||||
ActionLevel Level1;
|
ActionLevel<LatticeGaugeField> Level1;
|
||||||
Level1.push_back(&WilsonNf1a);
|
Level1.push_back(&WilsonNf1a);
|
||||||
Level1.push_back(&WilsonNf1b);
|
Level1.push_back(&WilsonNf1b);
|
||||||
Level1.push_back(&Waction);
|
Level1.push_back(&Waction);
|
||||||
|
|
||||||
ActionSet FullSet;
|
ActionSet<LatticeGaugeField> FullSet;
|
||||||
FullSet.push_back(Level1);
|
FullSet.push_back(Level1);
|
||||||
|
|
||||||
// Create integrator
|
// Create integrator
|
||||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||||
|
|
||||||
IntegratorParameters MDpar(12,20,1.0);
|
IntegratorParameters MDpar(20);
|
||||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||||
|
|
||||||
// Create HMC
|
// Create HMC
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||||
|
|
||||||
HMC.evolve(U);
|
HMC.evolve(U);
|
||||||
|
|
||||||
|
@ -15,14 +15,22 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> seeds({6,7,8,80});
|
||||||
GridParallelRNG pRNG(&Fine);
|
GridParallelRNG pRNG(&Fine);
|
||||||
pRNG.SeedRandomDevice();
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
std::vector<int> seedsS({1,2,3,4});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(seedsS);
|
||||||
|
|
||||||
LatticeLorentzColourMatrix U(&Fine);
|
LatticeLorentzColourMatrix U(&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG, U);
|
SU3::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
// simplify template declaration? Strip the lorentz from the second template
|
// simplify template declaration? Strip the lorentz from the second template
|
||||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
WilsonGaugeActionR Waction(5.6);
|
||||||
|
|
||||||
Real mass=-0.77;
|
Real mass=-0.77;
|
||||||
Real pv =0.0;
|
Real pv =0.0;
|
||||||
@ -35,23 +43,23 @@ int main (int argc, char ** argv)
|
|||||||
OneFlavourRatioRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(NumOp,DenOp,Params);
|
OneFlavourRatioRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(NumOp,DenOp,Params);
|
||||||
|
|
||||||
//Collect actions
|
//Collect actions
|
||||||
ActionLevel Level1;
|
ActionLevel<LatticeGaugeField> Level1;
|
||||||
Level1.push_back(&WilsonNf1a);
|
Level1.push_back(&WilsonNf1a);
|
||||||
Level1.push_back(&WilsonNf1b);
|
Level1.push_back(&WilsonNf1b);
|
||||||
Level1.push_back(&Waction);
|
Level1.push_back(&Waction);
|
||||||
ActionSet FullSet;
|
ActionSet<LatticeGaugeField> FullSet;
|
||||||
FullSet.push_back(Level1);
|
FullSet.push_back(Level1);
|
||||||
|
|
||||||
|
|
||||||
// Create integrator
|
// Create integrator
|
||||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||||
IntegratorParameters MDpar(12,20,1.0);
|
IntegratorParameters MDpar(20);
|
||||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||||
|
|
||||||
// Create HMC
|
// Create HMC
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||||
|
|
||||||
HMC.evolve(U);
|
HMC.evolve(U);
|
||||||
|
|
||||||
|
93
tests/Test_serialisation.cc
Normal file
93
tests/Test_serialisation.cc
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
class myclass {
|
||||||
|
public:
|
||||||
|
|
||||||
|
GRID_DECL_CLASS_MEMBERS(myclass,
|
||||||
|
int, x,
|
||||||
|
double, y,
|
||||||
|
bool , b,
|
||||||
|
std::string, name,
|
||||||
|
std::vector<double>, array,
|
||||||
|
std::vector<std::vector<double> >, twodimarray,
|
||||||
|
);
|
||||||
|
|
||||||
|
myclass(){}
|
||||||
|
myclass(int i) : array(4,5.1), twodimarray(3,std::vector<double>(2,1.23456)) {
|
||||||
|
x=i;
|
||||||
|
y=2*i;
|
||||||
|
b=true;
|
||||||
|
name="bother said pooh";
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int16_t i16 = 1;
|
||||||
|
uint16_t u16 = 2;
|
||||||
|
int32_t i32 = 3;
|
||||||
|
uint32_t u32 = 4;
|
||||||
|
int64_t i64 = 5;
|
||||||
|
uint64_t u64 = 6;
|
||||||
|
float f = M_PI;
|
||||||
|
double d = 2*M_PI;
|
||||||
|
bool b = false;
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
int main(int argc,char **argv)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
XMLWriter WR("bother.xml");
|
||||||
|
|
||||||
|
push(WR,"BasicTypes");
|
||||||
|
write(WR,std::string("i16"),i16);
|
||||||
|
write(WR,"u16",u16);
|
||||||
|
write(WR,"i32",i32);
|
||||||
|
write(WR,"u32",u32);
|
||||||
|
write(WR,"i64",i64);
|
||||||
|
write(WR,"u64",u64);
|
||||||
|
write(WR,"f",f);
|
||||||
|
write(WR,"d",d);
|
||||||
|
write(WR,"b",b);
|
||||||
|
pop(WR);
|
||||||
|
|
||||||
|
myclass obj(1234); // non-trivial constructor
|
||||||
|
write(WR,"obj",obj);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
XMLReader RD("bother.xml");
|
||||||
|
|
||||||
|
myclass copy1;
|
||||||
|
myclass copy2;
|
||||||
|
myclass copy3;
|
||||||
|
|
||||||
|
read(RD,"obj",copy1);
|
||||||
|
std::cout << "Loaded " << copy1<<std::endl;
|
||||||
|
|
||||||
|
{
|
||||||
|
BinaryWriter BWR("bother.bin");
|
||||||
|
write(BWR,"discard",copy1 );
|
||||||
|
}
|
||||||
|
{
|
||||||
|
BinaryReader BRD("bother.bin");
|
||||||
|
read (BRD,"discard",copy2 );
|
||||||
|
std::cout<<copy2<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
TextWriter TWR("bother.txt");
|
||||||
|
write(TWR,"discard",copy1 );
|
||||||
|
}
|
||||||
|
{
|
||||||
|
TextReader TRD("bother.txt");
|
||||||
|
read (TRD,"discard",copy3 );
|
||||||
|
std::cout<<copy3<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user