mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
commit
d9f2e2e06a
119
TODO
119
TODO
@ -1,48 +1,48 @@
|
||||
- PseudoFermions
|
||||
RECENT
|
||||
---------------
|
||||
|
||||
Done: Cayley, Partial , ContFrac force terms.
|
||||
|
||||
Done:
|
||||
- TwoFlavour
|
||||
- TwoFlavourEvenOdd
|
||||
- TwoFlavourRatio
|
||||
- TwoFlavourRatioEvenOdd
|
||||
|
||||
Done:
|
||||
- OneFlavourRationalEvenOdd
|
||||
- OneFlavourRationalRatioEvenOdd
|
||||
- OneFlavourRationalRatio
|
||||
- Clean up HMC -- DONE
|
||||
- LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE
|
||||
- Simplified the integrators a bit. -- DONE
|
||||
- Multi-timescale looks broken and operating on single timescale for now. -- DONE
|
||||
- pass GaugeField as template param. -- DONE
|
||||
- Reunitarise -- DONE
|
||||
- Force Gradient -- DONE
|
||||
- Prefer "RefreshInternal" or such like to "init" in naming -- DONE
|
||||
- Parallel io improvements -- DONE
|
||||
- Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
|
||||
|
||||
TODO:
|
||||
=> generalise to non-const EE
|
||||
=> Test DWF HMC
|
||||
=> Clean up HMC
|
||||
- Fix a threading bug that has been introduced and prevents HMC running hybrid OMP mode
|
||||
---------------
|
||||
Policies:
|
||||
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
|
||||
* Support different boundary conditions (finite temp, chem. potential ... )
|
||||
* Support different fermion representations?
|
||||
- contained entirely within the integrator presently
|
||||
|
||||
=> Integrators
|
||||
- Force Gradient
|
||||
- Multi-timescale looks broken and operating on single timescale for now.
|
||||
Fix/debug/rewrite this
|
||||
- Sign of force term.
|
||||
- Prefer "RefreshInternal" or such like to "init" in naming
|
||||
- Rename "Ta" as too unclear
|
||||
- Sign of force term.
|
||||
|
||||
- MacroMagic -> virtual reader class.
|
||||
- Reversibility test.
|
||||
|
||||
- Link smearing/boundary conds; Policy class based implementation
|
||||
- Rename "Ta" as too unclear
|
||||
|
||||
- Lanczos
|
||||
|
||||
- Rectangle gauge actions.
|
||||
Iwasaki,
|
||||
Symanzik,
|
||||
... etc...
|
||||
|
||||
- Prepare multigrid for HMC.
|
||||
Alternate setup schemes.
|
||||
- Prepare multigrid for HMC. - Alternate setup schemes.
|
||||
|
||||
- RNG filling from sparser grid, lower dim grid.
|
||||
- Support for ILDG --- ugly, not done
|
||||
|
||||
- Flavour matrices?
|
||||
|
||||
- FFTnD ?
|
||||
|
||||
================================================================
|
||||
*** Hacks and bug fixes to clean up and Audits
|
||||
* Hacks and bug fixes to clean up and Audits
|
||||
================================================================
|
||||
|
||||
* Extract/merge/set cleanup ; too many variants; rationalise and call simpler ones
|
||||
@ -68,6 +68,8 @@ Insert/Extract
|
||||
|
||||
* Thread scaling tests Xeon, XeonPhi
|
||||
|
||||
Not sure of status of this -- reverify. Things are working nicely now though.
|
||||
|
||||
* Make the Tensor types and Complex etc... play more nicely.
|
||||
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
||||
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
||||
@ -84,6 +86,37 @@ Insert/Extract
|
||||
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
|
||||
pure scalar.
|
||||
|
||||
======================================================================
|
||||
======================================================================
|
||||
======================================================================
|
||||
======================================================================
|
||||
Done: Cayley, Partial , ContFrac force terms.
|
||||
|
||||
DONE
|
||||
- PseudoFermions
|
||||
=> generalise to non-const EE ; likely defer (??) (NOT DONE)
|
||||
Done:
|
||||
- TwoFlavour
|
||||
- TwoFlavourEvenOdd
|
||||
- TwoFlavourRatio
|
||||
- TwoFlavourRatioEvenOdd
|
||||
|
||||
Done:
|
||||
- OneFlavourRationalEvenOdd
|
||||
- OneFlavourRationalRatioEvenOdd
|
||||
- OneFlavourRationalRatio
|
||||
|
||||
Done
|
||||
=> Test DWF HMC
|
||||
- Fix a threading bug that has been introduced and prevents HMC running hybrid OMP mode
|
||||
|
||||
Done:
|
||||
- RNG filling from sparser grid, lower dim grid.
|
||||
|
||||
|
||||
DONE
|
||||
- MacroMagic -> virtual reader class.
|
||||
|
||||
*** Expression template engine: -- DONE
|
||||
|
||||
[ -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
|
||||
@ -103,18 +136,10 @@ Insert/Extract
|
||||
|
||||
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
||||
|
||||
* Parallel io improvements
|
||||
- optional parallel MPI2 IO
|
||||
- move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
|
||||
|
||||
* Support for ILDG
|
||||
|
||||
* Support different boundary conditions (finite temp, chem. potential ... )
|
||||
|
||||
* Support different fermion representations?
|
||||
|
||||
Actions -- coherent framework for implementing actions and their forces.
|
||||
-- coherent framework for implementing actions and their forces.
|
||||
Actions
|
||||
|
||||
DONE
|
||||
* Fermion
|
||||
- Wilson
|
||||
- Clover
|
||||
@ -122,6 +147,7 @@ Actions -- coherent framework for implementing actions and their forces.
|
||||
- Mobius
|
||||
- z-Mobius
|
||||
|
||||
|
||||
Algorithms (lots of reuse/port from BFM)
|
||||
* LinearOperator
|
||||
* LinearSolver
|
||||
@ -139,19 +165,11 @@ Algorithms (lots of reuse/port from BFM)
|
||||
* Integrators, leapfrog, omelyan, force gradient etc...
|
||||
* etc..
|
||||
|
||||
* Gauge
|
||||
- Wilson, symanzik, iwasaki
|
||||
|
||||
* Flavour matrices?
|
||||
|
||||
Done
|
||||
* Pauli, SU subgroup, etc..
|
||||
|
||||
* su3 exponentiation & log etc.. [Jamie's code?]
|
||||
|
||||
* TaProj
|
||||
|
||||
* FFTnD ?
|
||||
|
||||
======================================================================================================
|
||||
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
|
||||
======================================================================================================
|
||||
@ -184,7 +202,6 @@ FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me argua
|
||||
- lib/communicator
|
||||
- lib/algorithms
|
||||
- lib/qcd
|
||||
future
|
||||
- lib/io/ -- GridLog, GridIn, GridErr, GridDebug, GridMessage
|
||||
- lib/qcd/actions
|
||||
- lib/qcd/measurements
|
||||
|
84
benchmarks/Benchmark_memory_asynch.cc
Normal file
84
benchmarks/Benchmark_memory_asynch.cc
Normal file
@ -0,0 +1,84 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
const int Nvec=4;
|
||||
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
||||
typedef iVector<vReal,Nvec> Vec;
|
||||
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking READ bandwidth"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"bytes/thread"<<"\t\t\t"<<"GB/s"<<"\t\t\t"<<"GB/s per thread"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
const int lmax = 16536*16;
|
||||
for(int lat=4;lat<=lmax;lat*=2){
|
||||
|
||||
int Nloop=lmax*128*4/lat;
|
||||
|
||||
std::vector<int> latt_size ({2*mpi_layout[0],2*mpi_layout[1],4*mpi_layout[2],lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]*threads;
|
||||
|
||||
Vec tsum; tsum = zero;
|
||||
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
std::vector<double> stop(threads);
|
||||
Vector<Vec> sum(threads);
|
||||
|
||||
std::vector<LatticeVec> x(threads,&Grid);
|
||||
for(int t=0;t<threads;t++){
|
||||
random(pRNG,x[t]);
|
||||
}
|
||||
|
||||
double start=usecond();
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int t=0;t<threads;t++){
|
||||
|
||||
sum[t] = x[t]._odata[0];
|
||||
for(int i=0;i<Nloop;i++){
|
||||
for(auto ss=x[t].begin();ss<x[t].end();ss++){
|
||||
sum[t] = sum[t]+x[t]._odata[ss];
|
||||
}
|
||||
}
|
||||
stop[t]=usecond();
|
||||
}
|
||||
|
||||
double max_stop=stop[0];
|
||||
double min_stop=stop[0];
|
||||
|
||||
for(int t=0;t<threads;t++){
|
||||
tsum+=sum[t];
|
||||
if ( stop[t]<min_stop ) min_stop=stop[t];
|
||||
if ( stop[t]>max_stop ) max_stop=stop[t];
|
||||
}
|
||||
|
||||
|
||||
|
||||
double max_time = (max_stop-start)/Nloop*1000;
|
||||
double min_time = (min_stop-start)/Nloop*1000;
|
||||
|
||||
double bytes=vol*Nvec*sizeof(Real);
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/threads<<"\t\t"<<bytes/max_time<<" - "<< bytes/min_time<<"\t\t"<<bytes/min_time/threads <<std::endl;
|
||||
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -10,144 +10,158 @@ int main (int argc, char ** argv)
|
||||
|
||||
const int Nvec=8;
|
||||
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
||||
typedef iVector<vReal,Nvec> Vec;
|
||||
|
||||
int Nloop=1000;
|
||||
|
||||
Vec rn = zero;
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking fused AXPY bandwidth ; sizeof(Real) "<<sizeof(Real)<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
uint64_t lmax=44;
|
||||
#define NLOOP (100*lmax*lmax*lmax*lmax/vol)
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
uint64_t Nloop=NLOOP;
|
||||
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid); random(pRNG,z);
|
||||
LatticeVec x(&Grid); random(pRNG,x);
|
||||
LatticeVec y(&Grid); random(pRNG,y);
|
||||
double a=2.0;
|
||||
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
// inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||
axpy(z,a,x,y);
|
||||
x._odata[0]=z._odata[0]; // serial loop dependence to prevent optimise
|
||||
y._odata[4]=z._odata[4];
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
double bytes=3*vol*Nvec*sizeof(Real);
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
LatticeVec z(&Grid); random(pRNG,z);
|
||||
LatticeVec x(&Grid); random(pRNG,x);
|
||||
LatticeVec y(&Grid); random(pRNG,y);
|
||||
double a=2.0;
|
||||
|
||||
uint64_t Nloop=NLOOP;
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
z=a*x-y;
|
||||
x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away
|
||||
y._odata[4]=z._odata[4];
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
double bytes=3*vol*Nvec*sizeof(Real);
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking SCALE bandwidth"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
uint64_t Nloop=NLOOP;
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
LatticeVec z(&Grid); random(pRNG,z);
|
||||
LatticeVec x(&Grid); random(pRNG,x);
|
||||
LatticeVec y(&Grid); random(pRNG,y);
|
||||
RealD a=2.0;
|
||||
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
z=a*x;
|
||||
x._odata[0]=z._odata[0]*2.0;
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double bytes=2*vol*Nvec*sizeof(Real);
|
||||
double flops=vol*Nvec*1;// mul
|
||||
std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
||||
std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking READ bandwidth"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
uint64_t Nloop=NLOOP;
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
LatticeVec z(&Grid); random(pRNG,z);
|
||||
LatticeVec x(&Grid); random(pRNG,x);
|
||||
LatticeVec y(&Grid); random(pRNG,y);
|
||||
RealD a=2.0;
|
||||
ComplexD nn;
|
||||
|
||||
Real nn;
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
nn=norm2(x);
|
||||
vsplat(x._odata[0]._internal[0],nn);
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double bytes=vol*Nvec*sizeof(Real);
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl;
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
|
||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
|
||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
|
||||
|
||||
|
||||
Benchmark_comms_SOURCES=Benchmark_comms.cc
|
||||
@ -10,6 +10,10 @@ Benchmark_dwf_SOURCES=Benchmark_dwf.cc
|
||||
Benchmark_dwf_LDADD=-lGrid
|
||||
|
||||
|
||||
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
|
||||
Benchmark_memory_asynch_LDADD=-lGrid
|
||||
|
||||
|
||||
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
|
||||
Benchmark_memory_bandwidth_LDADD=-lGrid
|
||||
|
||||
|
123
configure
vendored
123
configure
vendored
@ -1384,9 +1384,9 @@ Optional Features:
|
||||
--disable-dependency-tracking
|
||||
speeds up one-time build
|
||||
--disable-openmp do not use OpenMP
|
||||
--enable-simd=SSE4|AVX|AVX2|AVX512|MIC
|
||||
--enable-simd=SSE4|AVX|AVX2|AVX512|IMCI
|
||||
Select instructions to be SSE4.0, AVX 1.0, AVX
|
||||
2.0+FMA, AVX 512, MIC
|
||||
2.0+FMA, AVX 512, IMCI
|
||||
--enable-precision=single|double
|
||||
Select default word size of Real
|
||||
--enable-comms=none|mpi Select communications
|
||||
@ -6359,105 +6359,15 @@ fi
|
||||
done
|
||||
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __gmpf_init in -lgmp" >&5
|
||||
$as_echo_n "checking for __gmpf_init in -lgmp... " >&6; }
|
||||
if ${ac_cv_lib_gmp___gmpf_init+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
ac_check_lib_save_LIBS=$LIBS
|
||||
LIBS="-lgmp $LIBS"
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
|
||||
/* Override any GCC internal prototype to avoid an error.
|
||||
Use char because int might match the return type of a GCC
|
||||
builtin and then its argument prototype would still apply. */
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
char __gmpf_init ();
|
||||
int
|
||||
main ()
|
||||
{
|
||||
return __gmpf_init ();
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_cxx_try_link "$LINENO"; then :
|
||||
ac_cv_lib_gmp___gmpf_init=yes
|
||||
else
|
||||
ac_cv_lib_gmp___gmpf_init=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
LIBS=$ac_check_lib_save_LIBS
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gmp___gmpf_init" >&5
|
||||
$as_echo "$ac_cv_lib_gmp___gmpf_init" >&6; }
|
||||
if test "x$ac_cv_lib_gmp___gmpf_init" = xyes; then :
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define HAVE_LIBGMP 1
|
||||
_ACEOF
|
||||
|
||||
LIBS="-lgmp $LIBS"
|
||||
|
||||
else
|
||||
as_fn_error $? "GNU Multiple Precision GMP library was not found in your system.
|
||||
Please install or provide the correct path to your installation
|
||||
Info at: http://www.gmplib.org" "$LINENO" 5
|
||||
fi
|
||||
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpfr_init in -lmpfr" >&5
|
||||
$as_echo_n "checking for mpfr_init in -lmpfr... " >&6; }
|
||||
if ${ac_cv_lib_mpfr_mpfr_init+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
ac_check_lib_save_LIBS=$LIBS
|
||||
LIBS="-lmpfr $LIBS"
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
|
||||
/* Override any GCC internal prototype to avoid an error.
|
||||
Use char because int might match the return type of a GCC
|
||||
builtin and then its argument prototype would still apply. */
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
char mpfr_init ();
|
||||
int
|
||||
main ()
|
||||
{
|
||||
return mpfr_init ();
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_cxx_try_link "$LINENO"; then :
|
||||
ac_cv_lib_mpfr_mpfr_init=yes
|
||||
else
|
||||
ac_cv_lib_mpfr_mpfr_init=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
LIBS=$ac_check_lib_save_LIBS
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpfr_mpfr_init" >&5
|
||||
$as_echo "$ac_cv_lib_mpfr_mpfr_init" >&6; }
|
||||
if test "x$ac_cv_lib_mpfr_mpfr_init" = xyes; then :
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define HAVE_LIBMPFR 1
|
||||
_ACEOF
|
||||
|
||||
LIBS="-lmpfr $LIBS"
|
||||
|
||||
else
|
||||
as_fn_error $? "GNU Multiple Precision MPFR library was not found in your system.
|
||||
Please install or provide the correct path to your installation
|
||||
Info at: http://www.mpfr.org/" "$LINENO" 5
|
||||
fi
|
||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
||||
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
||||
#Please install or provide the correct path to your installation
|
||||
#Info at: http://www.gmplib.org)])
|
||||
|
||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
||||
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
||||
#Please install or provide the correct path to your installation
|
||||
#Info at: http://www.mpfr.org/)])
|
||||
|
||||
# Check whether --enable-simd was given.
|
||||
if test "${enable_simd+set}" = set; then :
|
||||
@ -6504,13 +6414,20 @@ $as_echo "#define AVX2 1" >>confdefs.h
|
||||
$as_echo "$as_me: WARNING: Your processor does not support AVX2 instructions" >&2;}
|
||||
fi
|
||||
;;
|
||||
AVX512|MIC)
|
||||
echo Configuring for AVX512 and MIC
|
||||
AVX512)
|
||||
echo Configuring for AVX512
|
||||
|
||||
$as_echo "#define AVX512 1" >>confdefs.h
|
||||
|
||||
supported="cross compilation"
|
||||
;;
|
||||
IMCI)
|
||||
echo Configuring for IMCI
|
||||
|
||||
$as_echo "#define IMCI 1" >>confdefs.h
|
||||
|
||||
supported="cross compilation"
|
||||
;;
|
||||
NEONv8)
|
||||
echo Configuring for experimental ARMv8a support
|
||||
|
||||
@ -6533,7 +6450,7 @@ esac
|
||||
if test "${enable_precision+set}" = set; then :
|
||||
enableval=$enable_precision; ac_PRECISION=${enable_precision}
|
||||
else
|
||||
ac_PRECISION=single
|
||||
ac_PRECISION=double
|
||||
fi
|
||||
|
||||
case ${ac_PRECISION} in
|
||||
|
33
configure.ac
33
configure.ac
@ -55,18 +55,18 @@ echo :::::::::::::::::::::::::::::::::::::::::::
|
||||
|
||||
AC_CHECK_FUNCS([gettimeofday])
|
||||
|
||||
AC_CHECK_LIB([gmp],[__gmpf_init],,
|
||||
[AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
||||
Please install or provide the correct path to your installation
|
||||
Info at: http://www.gmplib.org)])
|
||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
||||
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
||||
#Please install or provide the correct path to your installation
|
||||
#Info at: http://www.gmplib.org)])
|
||||
|
||||
AC_CHECK_LIB([mpfr],[mpfr_init],,
|
||||
[AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
||||
Please install or provide the correct path to your installation
|
||||
Info at: http://www.mpfr.org/)])
|
||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
||||
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
||||
#Please install or provide the correct path to your installation
|
||||
#Info at: http://www.mpfr.org/)])
|
||||
|
||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
|
||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
|
||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|IMCI],\
|
||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
|
||||
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
|
||||
|
||||
supported=no
|
||||
@ -99,9 +99,14 @@ case ${ac_SIMD} in
|
||||
AC_MSG_WARN([Your processor does not support AVX2 instructions])
|
||||
fi
|
||||
;;
|
||||
AVX512|MIC)
|
||||
echo Configuring for AVX512 and MIC
|
||||
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Corner] )
|
||||
AVX512)
|
||||
echo Configuring for AVX512
|
||||
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
|
||||
supported="cross compilation"
|
||||
;;
|
||||
IMCI)
|
||||
echo Configuring for IMCI
|
||||
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
|
||||
supported="cross compilation"
|
||||
;;
|
||||
NEONv8)
|
||||
@ -118,7 +123,7 @@ case ${ac_SIMD} in
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=single])
|
||||
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
||||
case ${ac_PRECISION} in
|
||||
single)
|
||||
echo default precision is single
|
||||
|
@ -17,6 +17,9 @@
|
||||
|
||||
#include <algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||
|
||||
// Lanczos support
|
||||
//#include <algorithms/iterative/MatrixUtils.h>
|
||||
//#include <algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||
|
||||
#include <algorithms/CoarsenedMatrix.h>
|
||||
|
||||
|
@ -72,7 +72,6 @@ operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return t
|
||||
|
||||
template<typename _Tp> inline bool
|
||||
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||
|
||||
|
||||
}; // namespace Grid
|
||||
#endif
|
||||
|
@ -6,7 +6,7 @@
|
||||
/* AVX2 Intrinsics */
|
||||
#undef AVX2
|
||||
|
||||
/* AVX512 Intrinsics for Knights Corner */
|
||||
/* AVX512 Intrinsics for Knights Landing */
|
||||
#undef AVX512
|
||||
|
||||
/* EMPTY_SIMD only for DEBUGGING */
|
||||
@ -56,12 +56,6 @@
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the `gmp' library (-lgmp). */
|
||||
#undef HAVE_LIBGMP
|
||||
|
||||
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
|
||||
#undef HAVE_LIBMPFR
|
||||
|
||||
/* Define to 1 if you have the <malloc.h> header file. */
|
||||
#undef HAVE_MALLOC_H
|
||||
|
||||
@ -116,6 +110,9 @@
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* IMCI Intrinsics for Knights Corner */
|
||||
#undef IMCI
|
||||
|
||||
/* NEON ARMv8 Experimental support */
|
||||
#undef NEONv8
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
///////////////////
|
||||
// Grid headers
|
||||
///////////////////
|
||||
#include <MacroMagic.h>
|
||||
#include <serialisation/Serialisation.h>
|
||||
#include <Config.h>
|
||||
#include <Timer.h>
|
||||
#include <Log.h>
|
||||
@ -45,6 +45,7 @@
|
||||
#include <Stencil.h>
|
||||
#include <Algorithms.h>
|
||||
#include <qcd/QCD.h>
|
||||
#include <parallelIO/BinaryIO.h>
|
||||
#include <parallelIO/NerscIO.h>
|
||||
|
||||
#include <Init.h>
|
||||
|
@ -177,7 +177,8 @@ void Grid_init(int *argc,char ***argv)
|
||||
Grid_quiesce_nodes();
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
|
||||
WilsonFermionStatic::HandOptDslash=1;
|
||||
QCD::WilsonFermionStatic::HandOptDslash=1;
|
||||
QCD::WilsonFermion5DStatic::HandOptDslash=1;
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
||||
LebesgueOrder::UseLebesgueOrder=1;
|
||||
|
@ -1,4 +1,4 @@
|
||||
|
||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Config.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./Init.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./Log.h ./MacroMagic.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./parallelIO/NerscIO.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h ./Timer.h
|
||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Config.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./Init.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./pugixml/pugixml.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h ./Timer.h
|
||||
|
||||
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./Init.cc ./Log.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/hmc/HMC.cc ./qcd/hmc/integrators/Integrator.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
||||
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./Init.cc ./Log.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
||||
|
@ -13,6 +13,11 @@
|
||||
|
||||
typedef uint32_t Integer;
|
||||
|
||||
#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
|
||||
#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
|
||||
#define _MM_SELECT_FOUR_TWO (A,B,C,D) _MM_SELECT_EIGHT_TWO(0,0,0,0,A,B,C,D)
|
||||
#define _MM_SELECT_TWO_TWO (A,B) _MM_SELECT_FOUR_TWO(0,0,A,B)
|
||||
|
||||
namespace Grid {
|
||||
|
||||
typedef float RealF;
|
||||
|
@ -17,7 +17,8 @@
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithms/approx/bigfloat.h>
|
||||
//#include <algorithms/approx/bigfloat.h>
|
||||
#include <algorithms/approx/bigfloat_double.h>
|
||||
|
||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||
#define SUM_MAX 10 // Maximum number of terms in exponential
|
||||
|
388
lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Normal file
388
lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Normal file
@ -0,0 +1,388 @@
|
||||
#ifndef GRID_IRL_H
|
||||
#define GRID_IRL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class ImplicitlyRestartedLanczos {
|
||||
public:
|
||||
|
||||
int Niter;
|
||||
int Nk;
|
||||
int Np;
|
||||
RealD enorm;
|
||||
RealD vthr;
|
||||
|
||||
LinearOperatorBase<Field> &_Linop;
|
||||
OperatorFunction<Field> &_poly;
|
||||
|
||||
ImplicitlyRestartedLanczos(
|
||||
LinearOperatorBase<Field> &Linop,
|
||||
OperatorFunction<Field> & poly,
|
||||
int _Nk,
|
||||
int _Np,
|
||||
RealD _enorm,
|
||||
RealD _vthrs,
|
||||
int _Niter) :
|
||||
_Linop(Linop),
|
||||
_poly(poly),
|
||||
Nk(_Nk),
|
||||
Np(_Np),
|
||||
enorm(_enorm),
|
||||
vthr(_vthrs)
|
||||
{
|
||||
vthr=_vthrs;
|
||||
Niter=_Niter;
|
||||
};
|
||||
|
||||
|
||||
void step(Vector<RealD>& lmda,
|
||||
Vector<RealD>& lmdb,
|
||||
Vector<Field>& evec,
|
||||
Field& f,int Nm,int k)
|
||||
{
|
||||
|
||||
assert( k< Nm );
|
||||
|
||||
w = opr_->mult(evec[k]);
|
||||
|
||||
if(k==0){ // Initial step
|
||||
|
||||
RealD wnorm= w*w;
|
||||
std::cout<<"wnorm="<<wnorm<<std::endl;
|
||||
|
||||
RealD alph = evec[k] * w;
|
||||
w -= alph * evec[k];
|
||||
lmd[k] = alph;
|
||||
|
||||
RealD beta = w * w;
|
||||
beta = sqrt(beta);
|
||||
RealD betar = 1.0/beta;
|
||||
|
||||
evec[k+1] = betar * w;
|
||||
lme[k] = beta;
|
||||
|
||||
} else { // Iteration step
|
||||
|
||||
w -= lme[k-1] * evec[k-1];
|
||||
|
||||
RealD alph = evec[k] * w;
|
||||
w -= alph * evec[k];
|
||||
|
||||
RealD beta = w * w;
|
||||
beta = sqrt(beta);
|
||||
RealD betar = 1.0/beta;
|
||||
w *= betar;
|
||||
|
||||
lmd[k] = alph;
|
||||
lme[k] = beta;
|
||||
orthogonalize(w,evec,k);
|
||||
|
||||
if(k < Nm-1) evec[k+1] = w;
|
||||
}
|
||||
}
|
||||
|
||||
void qr_decomp(Vector<RealD>& lmda,
|
||||
Vector<RealD>& lmdb,
|
||||
int Nk,
|
||||
int Nm,
|
||||
Vector<RealD>& Qt,
|
||||
RealD Dsft,
|
||||
int kmin,
|
||||
int kmax)
|
||||
{
|
||||
int k = kmin-1;
|
||||
RealD x;
|
||||
|
||||
RealD Fden = 1.0/sqrt((lmd[k]-Dsh)*(lmd[k]-Dsh) +lme[k]*lme[k]);
|
||||
RealD c = ( lmd[k] -Dsh) *Fden;
|
||||
RealD s = -lme[k] *Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k+1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||
x = -s*lme[k+1];
|
||||
lme[k+1] = c*lme[k+1];
|
||||
|
||||
for(int i=0; i<Nk; ++i){
|
||||
RealD Qtmp1 = Qt[i+Nm*k ];
|
||||
RealD Qtmp2 = Qt[i+Nm*(k+1)];
|
||||
Qt[i+Nm*k ] = c*Qtmp1 - s*Qtmp2;
|
||||
Qt[i+Nm*(k+1)] = s*Qtmp1 + c*Qtmp2;
|
||||
}
|
||||
|
||||
// Givens transformations
|
||||
for(int k = kmin; k < kmax-1; ++k){
|
||||
RealD Fden = 1.0/sqrt( x*x +lme[k-1]*lme[k-1]);
|
||||
RealD c = lme[k-1]*Fden;
|
||||
RealD s = - x*Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k+1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||
lme[k-1] = c*lme[k-1] -s*x;
|
||||
|
||||
if(k != kmax-2){
|
||||
x = -s*lme[k+1];
|
||||
lme[k+1] = c*lme[k+1];
|
||||
}
|
||||
|
||||
for(int i=0; i<Nk; ++i){
|
||||
RealD Qtmp1 = Qt[i+Nm*k ];
|
||||
RealD Qtmp2 = Qt[i+Nm*(k+1)];
|
||||
Qt[i+Nm*k ] = c*Qtmp1 -s*Qtmp2;
|
||||
Qt[i+Nm*(k+1)] = s*Qtmp1 +c*Qtmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void diagonalize(Vector<RealD>& lmda,
|
||||
Vector<RealD>& lmdb,
|
||||
int Nm2,
|
||||
int Nm,
|
||||
Vector<RealD>& Qt)
|
||||
{
|
||||
int Niter = 100*Nm;
|
||||
int kmin = 1;
|
||||
int kmax = Nk;
|
||||
// (this should be more sophisticated)
|
||||
|
||||
for(int iter=0; iter<Niter; ++iter){
|
||||
|
||||
// determination of 2x2 leading submatrix
|
||||
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
|
||||
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
|
||||
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
|
||||
// (Dsh: shift)
|
||||
|
||||
// transformation
|
||||
qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax);
|
||||
|
||||
// Convergence criterion (redef of kmin and kamx)
|
||||
for(int j=kmax-1; j>= kmin; --j){
|
||||
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
|
||||
if(fabs(lme[j-1])+dds > dds){
|
||||
kmax = j+1;
|
||||
goto continued;
|
||||
}
|
||||
}
|
||||
Niter = iter;
|
||||
return;
|
||||
|
||||
continued:
|
||||
for(int j=0; j<kmax-1; ++j){
|
||||
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
|
||||
if(fabs(lme[j])+dds > dds){
|
||||
kmin = j+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
|
||||
abort();
|
||||
}
|
||||
|
||||
void orthogonalize(Field& w,
|
||||
const Vector<Field>& evec,
|
||||
int k)
|
||||
{
|
||||
// Schmidt orthogonalization
|
||||
|
||||
size_t size = w.size();
|
||||
assert(size%2 ==0);
|
||||
|
||||
std::slice re(0,size/2,2);
|
||||
std::slice im(1,size/2,2);
|
||||
|
||||
for(int j=0; j<k; ++j){
|
||||
RealD prdr = evec[j]*w;
|
||||
RealD prdi = evec[j].im_prod(w);
|
||||
|
||||
valarray<RealD> evr(evec[j][re]);
|
||||
valarray<RealD> evi(evec[j][im]);
|
||||
|
||||
w.add(re, -prdr*evr +prdi*evi);
|
||||
w.add(im, -prdr*evi -prdi*evr);
|
||||
}
|
||||
}
|
||||
|
||||
void calc(Vector<RealD>& lmd,
|
||||
Vector<Field>& evec,
|
||||
const Field& b,
|
||||
int& Nsbt,
|
||||
int& Nconv)
|
||||
{
|
||||
const size_t fsize = evec[0].size();
|
||||
|
||||
Nconv = -1;
|
||||
Nsbt = 0;
|
||||
int Nm = Nk_+Np_;
|
||||
|
||||
std::cout << " -- Nk = " << Nk_ << " Np = "<< Np_ << endl;
|
||||
std::cout << " -- Nm = " << Nm << endl;
|
||||
std::cout << " -- size of lmd = " << lmd.size() << endl;
|
||||
std::cout << " -- size of evec = " << evec.size() << endl;
|
||||
|
||||
assert(Nm < evec.size() && Nm < lmd.size());
|
||||
|
||||
vector<RealD> lme(Nm);
|
||||
vector<RealD> lmd2(Nm);
|
||||
vector<RealD> lme2(Nm);
|
||||
vector<RealD> Qt(Nm*Nm);
|
||||
vector<int> Iconv(Nm);
|
||||
|
||||
vector<Field> B(Nm);
|
||||
for(int k=0; k<Nm; ++k) B[k].resize(fsize);
|
||||
|
||||
Field f(fsize);
|
||||
Field v(fsize);
|
||||
|
||||
int k1 = 1;
|
||||
int k2 = Nk_;
|
||||
int kconv = 0;
|
||||
|
||||
int Kdis = 0;
|
||||
int Kthrs = 0;
|
||||
RealD beta_k;
|
||||
|
||||
// Set initial vector
|
||||
evec[0] = 1.0;
|
||||
RealD vnorm = evec[0]*evec[0];
|
||||
evec[0] = 1.0/sqrt(vnorm);
|
||||
// (uniform vector)
|
||||
|
||||
// Initial Nk steps
|
||||
for(int k=0; k<k2; ++k) step(lmd,lme,evec,f,Nm,k);
|
||||
|
||||
// Restarting loop begins
|
||||
for(int iter = 0; iter<Niter_; ++iter){
|
||||
std::cout<<"\n iteration = "<< iter << endl;
|
||||
|
||||
int Nm2 = Nm - kconv;
|
||||
for(int k=k2; k<Nm; ++k) step(lmd,lme,evec,f,Nm,k);
|
||||
f *= lme[Nm-1];
|
||||
|
||||
// getting eigenvalues
|
||||
for(int k=0; k<Nm2; ++k){
|
||||
lmd2[k] = lmd[k+k1-1];
|
||||
lme2[k] = lme[k+k1-1];
|
||||
}
|
||||
setUnit_Qt(Nm,Qt);
|
||||
diagonalize(lmd2,lme2,Nm2,Nm,Qt);
|
||||
// sorting
|
||||
sort_->push(lmd2,Nm);
|
||||
|
||||
// Implicitly shifted QR transformations
|
||||
setUnit_Qt(Nm,Qt);
|
||||
for(int ip=k2; ip<Nm; ++ip)
|
||||
qr_decomp(lmd,lme,Nm,Nm,Qt,lmd2[ip],k1,Nm);
|
||||
|
||||
for(int i=0; i<(Nk_+1); ++i) B[i] = 0.0;
|
||||
|
||||
for(int j=k1-1; j<k2+1; ++j){
|
||||
for(int k=0; k<Nm; ++k){
|
||||
B[j] += Qt[k+Nm*j] * evec[k];
|
||||
}
|
||||
}
|
||||
for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
|
||||
|
||||
// Compressed vector f and beta(k2)
|
||||
f *= Qt[Nm-1+Nm*(k2-1)];
|
||||
f += lme[k2-1] * evec[k2];
|
||||
beta_k = f * f;
|
||||
beta_k = sqrt(beta_k);
|
||||
std::cout<<" beta(k) = "<<beta_k<<endl;
|
||||
|
||||
RealD betar = 1.0/beta_k;
|
||||
evec[k2] = betar * f;
|
||||
lme[k2-1] = beta_k;
|
||||
|
||||
// Convergence test
|
||||
for(int k=0; k<Nm2; ++k){
|
||||
lmd2[k] = lmd[k];
|
||||
lme2[k] = lme[k];
|
||||
}
|
||||
setUnit_Qt(Nm,Qt);
|
||||
diagonalize(lmd2,lme2,Nk_,Nm,Qt);
|
||||
|
||||
for(int k = 0; k<Nk_; ++k) B[k]=0.0;
|
||||
|
||||
for(int j = 0; j<Nk_; ++j){
|
||||
for(int k = 0; k<Nk_; ++k){
|
||||
B[j] += Qt[k+j*Nm] * evec[k];
|
||||
}
|
||||
}
|
||||
Kdis = 0;
|
||||
Kthrs = 0;
|
||||
|
||||
std::cout << setiosflags(ios_base::scientific);
|
||||
for(int i=0; i<Nk_; ++i){
|
||||
v = opr_->mult(B[i]);
|
||||
//std::cout<<"vv="<<v*v<<std::endl;
|
||||
|
||||
RealD vnum = B[i]*v;
|
||||
RealD vden = B[i]*B[i];
|
||||
lmd2[i] = vnum/vden;
|
||||
v -= lmd2[i]*B[i];
|
||||
RealD vv = v*v;
|
||||
|
||||
std::cout << " [" << setw(3)<< setiosflags(ios_base::right) <<i<<"] ";
|
||||
std::cout << setw(25)<< setiosflags(ios_base::left)<< lmd2[i];
|
||||
std::cout <<" "<< setw(25)<< setiosflags(ios_base::right)<< vv<< endl;
|
||||
|
||||
if(vv<enorm_){
|
||||
Iconv[Kdis] = i;
|
||||
++Kdis;
|
||||
if(sort_->saturated(lmd2[i],vthr)) ++Kthrs;
|
||||
std::cout<<"Kthrs="<<Kthrs<<endl;
|
||||
}
|
||||
} // i-loop end
|
||||
std::cout << resetiosflags(ios_base::scientific);
|
||||
|
||||
|
||||
std::cout<<" #modes converged: "<<Kdis<<endl;
|
||||
|
||||
if(Kthrs > 0){
|
||||
// (there is a converged eigenvalue larger than Vthrs.)
|
||||
Nconv = iter;
|
||||
goto converged;
|
||||
}
|
||||
} // end of iter loop
|
||||
|
||||
std::cout<<"\n NOT converged.\n";
|
||||
abort();
|
||||
|
||||
converged:
|
||||
// Sorting
|
||||
|
||||
lmd.clear();
|
||||
evec.clear();
|
||||
for(int i=0; i<Kdis; ++i){
|
||||
lmd.push_back(lmd2[Iconv[i]]);
|
||||
evec.push_back(B[Iconv[i]]);
|
||||
}
|
||||
sort_->push(lmd,evec,Kdis);
|
||||
Nsbt = Kdis - Kthrs;
|
||||
|
||||
std::cout << "\n Converged\n Summary :\n";
|
||||
std::cout << " -- Iterations = "<< Nconv << "\n";
|
||||
std::cout << " -- beta(k) = "<< beta_k << "\n";
|
||||
std::cout << " -- Kdis = "<< Kdis << "\n";
|
||||
std::cout << " -- Nsbt = "<< Nsbt << "\n";
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
48
lib/algorithms/iterative/MatrixUtils.h
Normal file
48
lib/algorithms/iterative/MatrixUtils.h
Normal file
@ -0,0 +1,48 @@
|
||||
#ifndef GRID_MATRIX_UTILS_H
|
||||
#define GRID_MATRIX_UTILS_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
namespace MatrixUtils {
|
||||
|
||||
template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
|
||||
N=A.size(); assert(N>0);
|
||||
M=A[0].size();
|
||||
for(int i=0;i<N;i++){
|
||||
assert(A[i].size()==M);
|
||||
}
|
||||
}
|
||||
|
||||
template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
|
||||
{
|
||||
int M;
|
||||
Size(A,N,M);
|
||||
assert(N==M);
|
||||
}
|
||||
|
||||
template<class T> inline void Fill(Matrix<T>& A,T & val)
|
||||
{
|
||||
int N,M;
|
||||
Size(A,N,M);
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<M;j++){
|
||||
A[i][j]=val;
|
||||
}}
|
||||
}
|
||||
template<class T> inline void Diagonal(Matrix<T>& A,T & val)
|
||||
{
|
||||
int N;
|
||||
SizeSquare(A,N);
|
||||
for(int i=0;i<N;i++){
|
||||
A[i][i]=val;
|
||||
}
|
||||
}
|
||||
template<class T> inline void Identity(Matrix<T>& A)
|
||||
{
|
||||
Fill(A,0.0);
|
||||
Diagonal(A,1.0);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
@ -87,6 +87,14 @@ class CartesianCommunicator {
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void RecvFrom(void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
void SendTo(void *xmit,
|
||||
int xmit_to_rank,
|
||||
int bytes);
|
||||
|
||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
|
@ -81,13 +81,30 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||
SendToRecvFromComplete(reqs);
|
||||
}
|
||||
void CartesianCommunicator::RecvFrom(void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
MPI_Status stat;
|
||||
int ierr=MPI_Recv(recv, bytes, MPI_CHAR,from,from,communicator,&stat);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::SendTo(void *xmit,
|
||||
int dest,
|
||||
int bytes)
|
||||
{
|
||||
int rank = _processor; // used for tag; must know who it comes from
|
||||
int ierr = MPI_Send(xmit, bytes, MPI_CHAR,dest,_processor,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
// Basic Halo comms primitive
|
||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
@ -100,7 +117,6 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
|
||||
list.push_back(xrq);
|
||||
list.push_back(rrq);
|
||||
|
||||
}
|
||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
|
@ -22,6 +22,20 @@ void CartesianCommunicator::GlobalSum(double &){}
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
||||
|
||||
void CartesianCommunicator::RecvFrom(void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
void CartesianCommunicator::SendTo(void *xmit,
|
||||
int xmit_to_rank,
|
||||
int bytes)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
// Basic Halo comms primitive -- should never call in single node
|
||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
int dest,
|
||||
|
@ -178,6 +178,7 @@ GridUnopClass(UnaryConj,conjugate(a));
|
||||
GridUnopClass(UnaryTrace,trace(a));
|
||||
GridUnopClass(UnaryTranspose,transpose(a));
|
||||
GridUnopClass(UnaryTa,Ta(a));
|
||||
GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a));
|
||||
GridUnopClass(UnaryReal,real(a));
|
||||
GridUnopClass(UnaryImag,imag(a));
|
||||
GridUnopClass(UnaryToReal,toReal(a));
|
||||
@ -290,6 +291,7 @@ GRID_DEF_UNOP(conjugate,UnaryConj);
|
||||
GRID_DEF_UNOP(trace,UnaryTrace);
|
||||
GRID_DEF_UNOP(transpose,UnaryTranspose);
|
||||
GRID_DEF_UNOP(Ta,UnaryTa);
|
||||
GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup);
|
||||
GRID_DEF_UNOP(real,UnaryReal);
|
||||
GRID_DEF_UNOP(imag,UnaryImag);
|
||||
GRID_DEF_UNOP(toReal,UnaryToReal);
|
||||
|
@ -29,6 +29,9 @@ extern int GridCshiftPermuteMap[4][16];
|
||||
class LatticeBase {};
|
||||
class LatticeExpressionBase {};
|
||||
|
||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >; // Aligned allocator??
|
||||
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; // Aligned allocator??
|
||||
|
||||
template <typename Op, typename T1>
|
||||
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
||||
public:
|
||||
@ -59,7 +62,7 @@ public:
|
||||
|
||||
GridBase *_grid;
|
||||
int checkerboard;
|
||||
std::vector<vobj,alignedAllocator<vobj> > _odata;
|
||||
Vector<vobj> _odata;
|
||||
|
||||
// to pthread need a computable loop where loop induction is not required
|
||||
int begin(void) { return 0;};
|
||||
|
513
lib/parallelIO/BinaryIO.h
Normal file
513
lib/parallelIO/BinaryIO.h
Normal file
@ -0,0 +1,513 @@
|
||||
#ifndef GRID_BINARY_IO_H
|
||||
#define GRID_BINARY_IO_H
|
||||
|
||||
#ifdef HAVE_ENDIAN_H
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <algorithm>
|
||||
// 64bit endian swap is a portability pain
|
||||
#ifndef __has_builtin // Optional of course.
|
||||
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
|
||||
#endif
|
||||
|
||||
#if HAVE_DECL_BE64TOH
|
||||
#undef Grid_ntohll
|
||||
#define Grid_ntohll be64toh
|
||||
#endif
|
||||
|
||||
#if HAVE_DECL_NTOHLL
|
||||
#undef Grid_ntohll
|
||||
#define Grid_ntohll ntohll
|
||||
#endif
|
||||
|
||||
#ifndef Grid_ntohll
|
||||
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
|
||||
#define Grid_ntohll(A) (A)
|
||||
|
||||
#else
|
||||
|
||||
#if __has_builtin(__builtin_bswap64)
|
||||
#define Grid_ntohll(A) __builtin_bswap64(A)
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// A little helper
|
||||
inline void removeWhitespace(std::string &key)
|
||||
{
|
||||
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
|
||||
}
|
||||
|
||||
class BinaryIO {
|
||||
|
||||
public:
|
||||
|
||||
|
||||
// Network is big endian
|
||||
static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);}
|
||||
static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);}
|
||||
static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);}
|
||||
static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);}
|
||||
|
||||
static inline void be32toh_v(void *file_object,uint32_t bytes)
|
||||
{
|
||||
uint32_t * f = (uint32_t *)file_object;
|
||||
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
||||
f[i] = ntohl(f[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// LE must Swap and switch to host
|
||||
static inline void le32toh_v(void *file_object,uint32_t bytes)
|
||||
{
|
||||
uint32_t *fp = (uint32_t *)file_object;
|
||||
uint32_t f;
|
||||
|
||||
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
||||
f = fp[i];
|
||||
// got network order and the network to host
|
||||
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
fp[i] = ntohl(f);
|
||||
}
|
||||
}
|
||||
|
||||
// BE is same as network
|
||||
static inline void be64toh_v(void *file_object,uint32_t bytes)
|
||||
{
|
||||
uint64_t * f = (uint64_t *)file_object;
|
||||
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
||||
f[i] = Grid_ntohll(f[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// LE must swap and switch;
|
||||
static inline void le64toh_v(void *file_object,uint32_t bytes)
|
||||
{
|
||||
uint64_t *fp = (uint64_t *)file_object;
|
||||
uint64_t f,g;
|
||||
|
||||
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
||||
f = fp[i];
|
||||
// got network order and the network to host
|
||||
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
g = g << 32;
|
||||
f = f >> 32;
|
||||
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
fp[i] = Grid_ntohll(g);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class fobj,class munger> static inline void Uint32Checksum(Lattice<vobj> lat,munger munge,uint32_t &csum)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase *grid = lat._grid ;
|
||||
std::cout <<GridLogMessage<< "Uint32Checksum "<<norm2(lat)<<std::endl;
|
||||
sobj siteObj;
|
||||
fobj fileObj;
|
||||
|
||||
csum = 0;
|
||||
std::vector<int> lcoor;
|
||||
for(int l=0;l<grid->lSites();l++){
|
||||
grid->CoorFromIndex(lcoor,l,grid->_ldimensions);
|
||||
peekLocalSite(siteObj,lat,lcoor);
|
||||
munge(siteObj,fileObj,csum);
|
||||
}
|
||||
grid->GlobalSum(csum);
|
||||
}
|
||||
|
||||
static inline void Uint32Checksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
|
||||
{
|
||||
for(int i=0;i*sizeof(uint32_t)<buf_size_bytes;i++){
|
||||
csum=csum+buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class fobj,class munger>
|
||||
static inline uint32_t readObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
std::cout<< GridLogMessage<< "Serial read I/O "<< file<< std::endl;
|
||||
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
|
||||
// Find the location of each site and send to primary node
|
||||
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
|
||||
// available (how short sighted is that?)
|
||||
std::ifstream fin(file,std::ios::binary|std::ios::in);
|
||||
fin.seekg(offset);
|
||||
|
||||
Umu = zero;
|
||||
uint32_t csum=0;
|
||||
fobj file_object;
|
||||
sobj munged;
|
||||
|
||||
for(int t=0;t<grid->_fdimensions[3];t++){
|
||||
for(int z=0;z<grid->_fdimensions[2];z++){
|
||||
for(int y=0;y<grid->_fdimensions[1];y++){
|
||||
for(int x=0;x<grid->_fdimensions[0];x++){
|
||||
|
||||
std::vector<int> site({x,y,z,t});
|
||||
|
||||
if ( grid->IsBoss() ) {
|
||||
fin.read((char *)&file_object,sizeof(file_object));
|
||||
|
||||
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
|
||||
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
|
||||
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
|
||||
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
|
||||
|
||||
munge(file_object,munged,csum);
|
||||
}
|
||||
// The boss who read the file has their value poked
|
||||
pokeSite(munged,Umu,site);
|
||||
}}}}
|
||||
return csum;
|
||||
}
|
||||
|
||||
template<class vobj,class fobj,class munger>
|
||||
static inline uint32_t writeObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string & format)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Serialise through node zero
|
||||
//////////////////////////////////////////////////
|
||||
std::cout<< GridLogMessage<< "Serial write I/O "<< file<<std::endl;
|
||||
|
||||
std::ofstream fout;
|
||||
if ( grid->IsBoss() ) {
|
||||
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
||||
fout.seekp(offset);
|
||||
}
|
||||
|
||||
uint32_t csum=0;
|
||||
fobj file_object;
|
||||
sobj unmunged;
|
||||
for(int t=0;t<grid->_fdimensions[3];t++){
|
||||
for(int z=0;z<grid->_fdimensions[2];z++){
|
||||
for(int y=0;y<grid->_fdimensions[1];y++){
|
||||
for(int x=0;x<grid->_fdimensions[0];x++){
|
||||
|
||||
std::vector<int> site({x,y,z,t});
|
||||
// peek & write
|
||||
peekSite(unmunged,Umu,site);
|
||||
|
||||
munge(unmunged,file_object,csum);
|
||||
|
||||
|
||||
if ( grid->IsBoss() ) {
|
||||
|
||||
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
|
||||
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
|
||||
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
|
||||
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
|
||||
|
||||
fout.write((char *)&file_object,sizeof(file_object));
|
||||
}
|
||||
}}}}
|
||||
|
||||
return csum;
|
||||
}
|
||||
|
||||
template<class vobj,class fobj,class munger>
|
||||
static inline uint32_t readObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
|
||||
|
||||
// Take into account block size of parallel file systems want about
|
||||
// 4-16MB chunks.
|
||||
// Ideally one reader/writer per xy plane and read these contiguously
|
||||
// with comms from nominated I/O nodes.
|
||||
std::ifstream fin;
|
||||
|
||||
int nd = grid->_ndimension;
|
||||
std::vector<int> parallel(nd,1);
|
||||
std::vector<int> ioproc (nd);
|
||||
std::vector<int> start(nd);
|
||||
std::vector<int> range(nd);
|
||||
|
||||
for(int d=0;d<nd;d++){
|
||||
assert(grid->CheckerBoarded(d) == 0);
|
||||
}
|
||||
|
||||
uint64_t slice_vol = 1;
|
||||
|
||||
int IOnode = 1;
|
||||
for(int d=0;d<grid->_ndimension;d++) {
|
||||
|
||||
if ( d==0 ) parallel[d] = 0;
|
||||
if (parallel[d]) {
|
||||
range[d] = grid->_ldimensions[d];
|
||||
start[d] = grid->_processor_coor[d]*range[d];
|
||||
ioproc[d]= grid->_processor_coor[d];
|
||||
} else {
|
||||
range[d] = grid->_gdimensions[d];
|
||||
start[d] = 0;
|
||||
ioproc[d]= 0;
|
||||
|
||||
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||
}
|
||||
slice_vol = slice_vol * range[d];
|
||||
}
|
||||
|
||||
{
|
||||
uint32_t tmp = IOnode;
|
||||
grid->GlobalSum(tmp);
|
||||
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||
for(int d=0;d<grid->_ndimension;d++){
|
||||
std::cout<< range[d];
|
||||
if( d< grid->_ndimension-1 )
|
||||
std::cout<< " x ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
int myrank = grid->ThisRank();
|
||||
int iorank = grid->RankFromProcessorCoor(ioproc);
|
||||
|
||||
if ( IOnode ) {
|
||||
fin.open(file,std::ios::binary|std::ios::in);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Find the location of each site and send to primary node
|
||||
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
|
||||
// available (how short sighted is that?)
|
||||
//////////////////////////////////////////////////////////
|
||||
Umu = zero;
|
||||
uint32_t csum=0;
|
||||
fobj fileObj;
|
||||
sobj siteObj;
|
||||
|
||||
// need to implement these loops in Nd independent way with a lexico conversion
|
||||
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||
|
||||
std::vector<int> tsite(nd); // temporary mixed up site
|
||||
std::vector<int> gsite(nd);
|
||||
std::vector<int> lsite(nd);
|
||||
std::vector<int> iosite(nd);
|
||||
|
||||
grid->CoorFromIndex(tsite,tlex,range);
|
||||
|
||||
for(int d=0;d<nd;d++){
|
||||
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||
gsite[d] = tsite[d]+start[d]; // global site
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
// Get the rank of owner of data
|
||||
/////////////////////////
|
||||
int rank, o_idx,i_idx, g_idx;
|
||||
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
|
||||
grid->GlobalCoorToGlobalIndex(gsite,g_idx);
|
||||
|
||||
////////////////////////////////
|
||||
// iorank reads from the seek
|
||||
////////////////////////////////
|
||||
if (myrank == iorank) {
|
||||
|
||||
fin.seekg(offset+g_idx*sizeof(fileObj));
|
||||
fin.read((char *)&fileObj,sizeof(fileObj));
|
||||
|
||||
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||
|
||||
munge(fileObj,siteObj,csum);
|
||||
|
||||
if ( rank != myrank ) {
|
||||
grid->SendTo((void *)&siteObj,rank,sizeof(siteObj));
|
||||
} else {
|
||||
pokeLocalSite(siteObj,Umu,lsite);
|
||||
}
|
||||
|
||||
} else {
|
||||
if ( myrank == rank ) {
|
||||
grid->RecvFrom((void *)&siteObj,iorank,sizeof(siteObj));
|
||||
pokeLocalSite(siteObj,Umu,lsite);
|
||||
}
|
||||
}
|
||||
grid->Barrier(); // necessary?
|
||||
}
|
||||
|
||||
grid->GlobalSum(csum);
|
||||
|
||||
return csum;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Parallel writer
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj,class fobj,class munger>
|
||||
static inline uint32_t writeObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string & format)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
|
||||
int nd = grid->_ndimension;
|
||||
for(int d=0;d<nd;d++){
|
||||
assert(grid->CheckerBoarded(d) == 0);
|
||||
}
|
||||
|
||||
std::vector<int> parallel(nd,1);
|
||||
std::vector<int> ioproc (nd);
|
||||
std::vector<int> start(nd);
|
||||
std::vector<int> range(nd);
|
||||
|
||||
uint64_t slice_vol = 1;
|
||||
|
||||
int IOnode = 1;
|
||||
|
||||
for(int d=0;d<grid->_ndimension;d++) {
|
||||
|
||||
if ( d==0 ) parallel[d] = 0;
|
||||
|
||||
if (parallel[d]) {
|
||||
range[d] = grid->_ldimensions[d];
|
||||
start[d] = grid->_processor_coor[d]*range[d];
|
||||
ioproc[d]= grid->_processor_coor[d];
|
||||
} else {
|
||||
range[d] = grid->_gdimensions[d];
|
||||
start[d] = 0;
|
||||
ioproc[d]= 0;
|
||||
|
||||
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||
}
|
||||
|
||||
slice_vol = slice_vol * range[d];
|
||||
}
|
||||
|
||||
{
|
||||
uint32_t tmp = IOnode;
|
||||
grid->GlobalSum(tmp);
|
||||
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||
for(int d=0;d<grid->_ndimension;d++){
|
||||
std::cout<< range[d];
|
||||
if( d< grid->_ndimension-1 )
|
||||
std::cout<< " x ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
int myrank = grid->ThisRank();
|
||||
int iorank = grid->RankFromProcessorCoor(ioproc);
|
||||
|
||||
// Take into account block size of parallel file systems want about
|
||||
// 4-16MB chunks.
|
||||
// Ideally one reader/writer per xy plane and read these contiguously
|
||||
// with comms from nominated I/O nodes.
|
||||
std::ofstream fout;
|
||||
if ( IOnode ) fout.open(file,std::ios::binary|std::ios::in|std::ios::out);
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Find the location of each site and send to primary node
|
||||
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
|
||||
// available (how short sighted is that?)
|
||||
//////////////////////////////////////////////////////////
|
||||
|
||||
uint32_t csum=0;
|
||||
fobj fileObj;
|
||||
sobj siteObj;
|
||||
|
||||
|
||||
// need to implement these loops in Nd independent way with a lexico conversion
|
||||
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||
|
||||
std::vector<int> tsite(nd); // temporary mixed up site
|
||||
std::vector<int> gsite(nd);
|
||||
std::vector<int> lsite(nd);
|
||||
std::vector<int> iosite(nd);
|
||||
|
||||
grid->CoorFromIndex(tsite,tlex,range);
|
||||
|
||||
for(int d=0;d<nd;d++){
|
||||
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||
gsite[d] = tsite[d]+start[d]; // global site
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////
|
||||
// Get the rank of owner of data
|
||||
/////////////////////////
|
||||
int rank, o_idx,i_idx, g_idx;
|
||||
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
|
||||
grid->GlobalCoorToGlobalIndex(gsite,g_idx);
|
||||
|
||||
////////////////////////////////
|
||||
// iorank writes from the seek
|
||||
////////////////////////////////
|
||||
if (myrank == iorank) {
|
||||
|
||||
if ( rank != myrank ) {
|
||||
grid->RecvFrom((void *)&siteObj,rank,sizeof(siteObj));
|
||||
} else {
|
||||
peekLocalSite(siteObj,Umu,lsite);
|
||||
}
|
||||
|
||||
munge(siteObj,fileObj,csum);
|
||||
|
||||
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
|
||||
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
|
||||
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
|
||||
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
|
||||
|
||||
fout.seekp(offset+g_idx*sizeof(fileObj));
|
||||
fout.write((char *)&fileObj,sizeof(fileObj));
|
||||
|
||||
} else {
|
||||
if ( myrank == rank ) {
|
||||
peekLocalSite(siteObj,Umu,lsite);
|
||||
grid->SendTo((void *)&siteObj,iorank,sizeof(siteObj));
|
||||
}
|
||||
}
|
||||
grid->Barrier(); // necessary// or every 16 packets to rate throttle??
|
||||
}
|
||||
|
||||
grid->GlobalSum(csum);
|
||||
|
||||
return csum;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -7,57 +7,23 @@
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
|
||||
#ifdef HAVE_ENDIAN_H
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include <arpa/inet.h>
|
||||
|
||||
// 64bit endian swap is a portability pain
|
||||
#ifndef __has_builtin // Optional of course.
|
||||
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
|
||||
#endif
|
||||
|
||||
#if HAVE_DECL_BE64TOH
|
||||
#undef Grid_ntohll
|
||||
#define Grid_ntohll be64toh
|
||||
#endif
|
||||
|
||||
#if HAVE_DECL_NTOHLL
|
||||
#undef Grid_ntohll
|
||||
#define Grid_ntohll ntohll
|
||||
#endif
|
||||
|
||||
#ifndef Grid_ntohll
|
||||
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
|
||||
#define Grid_ntohll(A) (A)
|
||||
|
||||
#else
|
||||
|
||||
#if __has_builtin(__builtin_bswap64)
|
||||
#define Grid_ntohll(A) __builtin_bswap64(A)
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <pwd.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
using namespace QCD;
|
||||
using namespace Grid;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Some data types for intermediate storage
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
|
||||
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
|
||||
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
|
||||
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
|
||||
|
||||
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
|
||||
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
|
||||
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// header specification/interpretation
|
||||
@ -86,50 +52,173 @@ class NerscField {
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Bit and Physical Checksumming and QA of data
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline void NerscGrid(GridBase *grid,NerscField &header)
|
||||
{
|
||||
assert(grid->_ndimension==4);
|
||||
for(int d=0;d<4;d++) {
|
||||
header.dimension[d] = grid->_fdimensions[d];
|
||||
}
|
||||
for(int d=0;d<4;d++) {
|
||||
header.boundary[d] = std::string("PERIODIC");
|
||||
}
|
||||
}
|
||||
template<class GaugeField>
|
||||
inline void NerscStatistics(GaugeField & data,NerscField &header)
|
||||
{
|
||||
header.link_trace=Grid::QCD::WilsonLoops<GaugeField>::linkTrace(data);
|
||||
header.plaquette =Grid::QCD::WilsonLoops<GaugeField>::avgPlaquette(data);
|
||||
}
|
||||
|
||||
inline void NerscMachineCharacteristics(NerscField &header)
|
||||
{
|
||||
// Who
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
if (pw) header.creator = std::string(pw->pw_name);
|
||||
|
||||
// When
|
||||
std::time_t t = std::time(nullptr);
|
||||
std::tm tm = *std::localtime(&t);
|
||||
std::ostringstream oss;
|
||||
// oss << std::put_time(&tm, "%c %Z");
|
||||
header.creation_date = oss.str();
|
||||
header.archive_date = header.creation_date;
|
||||
|
||||
// What
|
||||
struct utsname name; uname(&name);
|
||||
header.creator_hardware = std::string(name.nodename)+"-";
|
||||
header.creator_hardware+= std::string(name.machine)+"-";
|
||||
header.creator_hardware+= std::string(name.sysname)+"-";
|
||||
header.creator_hardware+= std::string(name.release);
|
||||
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Utilities ; these are QCD aware
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
|
||||
{
|
||||
BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum);
|
||||
}
|
||||
inline void reconstruct3(LorentzColourMatrix & cm)
|
||||
{
|
||||
const int x=0;
|
||||
const int y=1;
|
||||
const int z=2;
|
||||
for(int mu=0;mu<4;mu++){
|
||||
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
|
||||
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
|
||||
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
|
||||
}
|
||||
}
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct NerscSimpleMunger{
|
||||
|
||||
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
||||
|
||||
for(int mu=0;mu<4;mu++){
|
||||
for(int i=0;i<3;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)()(i,j) = in(mu)()(i,j);
|
||||
}}}
|
||||
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
||||
};
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct NerscSimpleUnmunger{
|
||||
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
for(int i=0;i<Nc;i++){
|
||||
for(int j=0;j<Nc;j++){
|
||||
out(mu)()(i,j) = in(mu)()(i,j);
|
||||
}}}
|
||||
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
||||
};
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Nersc3x2munger{
|
||||
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
||||
|
||||
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
||||
|
||||
for(int mu=0;mu<4;mu++){
|
||||
for(int i=0;i<2;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)()(i,j) = in(mu)(i)(j);
|
||||
}}
|
||||
}
|
||||
reconstruct3(out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Nersc3x2unmunger{
|
||||
|
||||
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
||||
|
||||
|
||||
for(int mu=0;mu<4;mu++){
|
||||
for(int i=0;i<2;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)(i)(j) = in(mu)()(i,j);
|
||||
}}
|
||||
}
|
||||
|
||||
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Write and read from fstream; comput header offset for payload
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline unsigned int writeNerscHeader(NerscField &field,std::string file)
|
||||
{
|
||||
std::ofstream fout(file,std::ios::out);
|
||||
class NerscIO : public BinaryIO {
|
||||
public:
|
||||
|
||||
static inline unsigned int writeHeader(NerscField &field,std::string file)
|
||||
{
|
||||
std::ofstream fout(file,std::ios::out);
|
||||
|
||||
fout.seekp(0,std::ios::beg);
|
||||
fout << "BEGIN_HEADER" << std::endl;
|
||||
fout << "HDR_VERSION = " << field.hdr_version << std::endl;
|
||||
fout << "DATATYPE = " << field.data_type << std::endl;
|
||||
fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
|
||||
fout.seekp(0,std::ios::beg);
|
||||
fout << "BEGIN_HEADER" << std::endl;
|
||||
fout << "HDR_VERSION = " << field.hdr_version << std::endl;
|
||||
fout << "DATATYPE = " << field.data_type << std::endl;
|
||||
fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
|
||||
|
||||
for(int i=0;i<4;i++){
|
||||
fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
|
||||
}
|
||||
// just to keep the space and write it later
|
||||
fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
|
||||
fout << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;
|
||||
for(int i=0;i<4;i++){
|
||||
fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
|
||||
}
|
||||
fout << "CHECKSUM = "<< std::hex << std::setw(16) << 0 << field.checksum << std::endl;
|
||||
for(int i=0;i<4;i++){
|
||||
fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
|
||||
}
|
||||
// just to keep the space and write it later
|
||||
fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
|
||||
fout << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;
|
||||
for(int i=0;i<4;i++){
|
||||
fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
|
||||
}
|
||||
|
||||
fout << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;
|
||||
fout << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;
|
||||
fout << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;
|
||||
fout << "CREATOR = " << field.creator << std::endl;
|
||||
fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
|
||||
fout << "CREATION_DATE = " << field.creation_date << std::endl;
|
||||
fout << "ARCHIVE_DATE = " << field.archive_date << std::endl;
|
||||
fout << "FLOATING_POINT = " << field.floating_point << std::endl;
|
||||
fout << "END_HEADER" << std::endl;
|
||||
field.data_start = fout.tellp();
|
||||
return field.data_start;
|
||||
fout << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::endl;
|
||||
fout << std::dec;
|
||||
|
||||
fout << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;
|
||||
fout << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;
|
||||
fout << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;
|
||||
fout << "CREATOR = " << field.creator << std::endl;
|
||||
fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
|
||||
fout << "CREATION_DATE = " << field.creation_date << std::endl;
|
||||
fout << "ARCHIVE_DATE = " << field.archive_date << std::endl;
|
||||
fout << "FLOATING_POINT = " << field.floating_point << std::endl;
|
||||
fout << "END_HEADER" << std::endl;
|
||||
field.data_start = fout.tellp();
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
// A little helper
|
||||
inline void removeWhitespace(std::string &key)
|
||||
{
|
||||
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
|
||||
}
|
||||
// for the header-reader
|
||||
inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
|
||||
static inline int readHeader(std::string file,GridBase *grid, NerscField &field)
|
||||
{
|
||||
int offset=0;
|
||||
std::map<std::string,std::string> header;
|
||||
@ -163,7 +252,6 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
|
||||
//////////////////////////////////////////////////
|
||||
// chomp the values
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
field.hdr_version = header["HDR_VERSION"];
|
||||
field.data_type = header["DATATYPE"];
|
||||
field.storage_format = header["STORAGE_FORMAT"];
|
||||
@ -200,314 +288,21 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Utilities
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
inline void reconstruct3(LorentzColourMatrix & cm)
|
||||
{
|
||||
const int x=0;
|
||||
const int y=1;
|
||||
const int z=2;
|
||||
for(int mu=0;mu<4;mu++){
|
||||
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
|
||||
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
|
||||
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void inline be32toh_v(void *file_object,uint32_t bytes)
|
||||
{
|
||||
uint32_t * f = (uint32_t *)file_object;
|
||||
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
||||
f[i] = ntohl(f[i]);
|
||||
}
|
||||
}
|
||||
void inline le32toh_v(void *file_object,uint32_t bytes)
|
||||
{
|
||||
uint32_t *fp = (uint32_t *)file_object;
|
||||
|
||||
uint32_t f;
|
||||
|
||||
for(int i=0;i*sizeof(uint32_t)<bytes;i++){
|
||||
f = fp[i];
|
||||
// got network order and the network to host
|
||||
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
fp[i] = ntohl(f);
|
||||
}
|
||||
}
|
||||
void inline be64toh_v(void *file_object,uint32_t bytes)
|
||||
{
|
||||
uint64_t * f = (uint64_t *)file_object;
|
||||
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
||||
f[i] = Grid_ntohll(f[i]);
|
||||
}
|
||||
}
|
||||
void inline le64toh_v(void *file_object,uint32_t bytes)
|
||||
{
|
||||
uint64_t *fp = (uint64_t *)file_object;
|
||||
uint64_t f,g;
|
||||
|
||||
for(int i=0;i*sizeof(uint64_t)<bytes;i++){
|
||||
f = fp[i];
|
||||
// got network order and the network to host
|
||||
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
g = g << 32;
|
||||
f = f >> 32;
|
||||
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
fp[i] = ntohl(g);
|
||||
}
|
||||
}
|
||||
|
||||
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size,uint32_t &csum)
|
||||
{
|
||||
for(int i=0;i*sizeof(uint32_t)<buf_size;i++){
|
||||
csum=csum+buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct NerscSimpleMunger{
|
||||
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
||||
|
||||
for(int mu=0;mu<4;mu++){
|
||||
for(int i=0;i<3;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)()(i,j) = in(mu)()(i,j);
|
||||
}}}
|
||||
|
||||
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
||||
};
|
||||
};
|
||||
template<class fobj,class sobj>
|
||||
struct NerscSimpleUnmunger{
|
||||
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
||||
for(int mu=0;mu<4;mu++){
|
||||
for(int i=0;i<3;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)()(i,j) = in(mu)()(i,j);
|
||||
}}}
|
||||
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
||||
};
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Nersc3x2munger{
|
||||
void operator() (fobj &in,sobj &out,uint32_t &csum){
|
||||
|
||||
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
|
||||
|
||||
for(int mu=0;mu<4;mu++){
|
||||
for(int i=0;i<2;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)()(i,j) = in(mu)(i)(j);
|
||||
}}
|
||||
}
|
||||
reconstruct3(out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Nersc3x2unmunger{
|
||||
|
||||
void operator() (sobj &in,fobj &out,uint32_t &csum){
|
||||
|
||||
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
|
||||
|
||||
for(int mu=0;mu<4;mu++){
|
||||
for(int i=0;i<2;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)(i)(j) = in(mu)()(i,j);
|
||||
}}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Template wizardry to map types to strings for NERSC in an extensible way
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> struct NerscDataType {
|
||||
static void DataType (std::string &str) { str = std::string("4D_BINARY_UNKNOWN"); };
|
||||
static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG"); };
|
||||
};
|
||||
|
||||
template<> struct NerscDataType<iColourMatrix<ComplexD> > {
|
||||
static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); };
|
||||
static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG");};
|
||||
};
|
||||
|
||||
template<> struct NerscDataType<iColourMatrix<ComplexF> > {
|
||||
static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); };
|
||||
static void FloatingPoint(std::string &str) { str = std::string("IEEE32BIG");};
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Bit and Physical Checksumming and QA of data
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
template<class vobj> inline uint32_t NerscChecksum(Lattice<vobj> & data)
|
||||
{
|
||||
uint32_t sum;
|
||||
for(int ss=0;ss<data._grid->Osites();ss++){
|
||||
uint32_t *iptr = (uint32_t *)& data._odata[0] ;
|
||||
for(int i=0;i<sizeof(vobj);i+=sizeof(uint32_t)){
|
||||
sum=sum+iptr[i];
|
||||
}
|
||||
}
|
||||
data._grid->globalSum(sum);
|
||||
return sum;
|
||||
}
|
||||
*/
|
||||
template<class vobj> inline void NerscPhysicalCharacteristics(Lattice<vobj> & data,NerscField &header)
|
||||
{
|
||||
header.data_type = NerscDataType<vobj>::DataType;
|
||||
header.floating_point = NerscDataType<vobj>::FloatingPoint;
|
||||
return;
|
||||
}
|
||||
|
||||
template<> inline void NerscPhysicalCharacteristics(LatticeGaugeField & data,NerscField &header)
|
||||
{
|
||||
NerscDataType<decltype(data._odata[0])>::DataType(header.data_type);
|
||||
NerscDataType<decltype(data._odata[0])>::FloatingPoint(header.floating_point);
|
||||
header.link_trace=1.0;
|
||||
header.plaquette =1.0;
|
||||
}
|
||||
|
||||
template<class vobj> inline void NerscStatisics(Lattice<vobj> & data,NerscField &header)
|
||||
{
|
||||
assert(data._grid->_ndimension==4);
|
||||
|
||||
for(int d=0;d<4;d++)
|
||||
header.dimension[d] = data._grid->_fdimensions[d];
|
||||
|
||||
// compute checksum and any physical properties contained for this type
|
||||
// header.checksum = NerscChecksum(data);
|
||||
|
||||
NerscPhysicalCharacteristics(data,header);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Now the meat: the object readers
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj,class fobj,class munger>
|
||||
inline void readNerscObject(Lattice<vobj> &Umu,std::string file,munger munge,int offset,std::string &format)
|
||||
|
||||
template<class vsimd>
|
||||
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file)
|
||||
{
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
int offset = readHeader(file,Umu._grid,header);
|
||||
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
|
||||
// Find the location of each site and send to primary node
|
||||
// for(int site=0; site < Layout::vol(); ++site){
|
||||
// multi1d<int> coord = crtesn(site, Layout::lattSize());
|
||||
// for(int dd=0; dd<Nd; dd++){ /* dir */
|
||||
// cfg_in.readArray(su3_buffer, float_size, mat_size);
|
||||
//
|
||||
// Above from Chroma; defines loop order now that NERSC doc no longer
|
||||
// available (how short sighted is that?)
|
||||
{
|
||||
std::ifstream fin(file,std::ios::binary|std::ios::in);
|
||||
fin.seekg(offset);
|
||||
|
||||
Umu = zero;
|
||||
uint32_t csum=0;
|
||||
fobj file_object;
|
||||
sobj munged;
|
||||
|
||||
for(int t=0;t<grid->_fdimensions[3];t++){
|
||||
for(int z=0;z<grid->_fdimensions[2];z++){
|
||||
for(int y=0;y<grid->_fdimensions[1];y++){
|
||||
for(int x=0;x<grid->_fdimensions[0];x++){
|
||||
|
||||
std::vector<int> site({x,y,z,t});
|
||||
|
||||
if ( grid->IsBoss() ) {
|
||||
fin.read((char *)&file_object,sizeof(file_object));
|
||||
|
||||
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
|
||||
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
|
||||
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
|
||||
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
|
||||
|
||||
munge(file_object,munged,csum);
|
||||
}
|
||||
// The boss who read the file has their value poked
|
||||
pokeSite(munged,Umu,site);
|
||||
}}}}
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class sobj,class fobj,class munger>
|
||||
inline void writeNerscObject(Lattice<vobj> &Umu,std::string file,munger munge,int offset,
|
||||
int sequence,double lt,double pl)
|
||||
{
|
||||
GridBase *grid = Umu._grid;
|
||||
NerscField header;
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// First write the header; this is in wrong place
|
||||
//////////////////////////////////////////////////
|
||||
assert(grid->_ndimension == 4);
|
||||
for(int d=0;d<4;d++){
|
||||
header.dimension[d]=grid->_fdimensions[d];
|
||||
header.boundary [d]=std::string("PERIODIC");;
|
||||
}
|
||||
header.hdr_version=std::string("WHATDAHECK");
|
||||
// header.storage_format=storage_format<vobj>::string; // use template specialisation
|
||||
// header.data_type=data_type<vobj>::string;
|
||||
header.storage_format=std::string("debug");
|
||||
header.data_type =std::string("debug");
|
||||
|
||||
//FIXME; use template specialisation to fill these out
|
||||
header.link_trace =lt;
|
||||
header.plaquette =pl;
|
||||
header.checksum =0;
|
||||
|
||||
//
|
||||
header.sequence_number =sequence;
|
||||
header.ensemble_id =std::string("UKQCD");
|
||||
header.ensemble_label =std::string("UKQCD");
|
||||
header.creator =std::string("Tadahito");
|
||||
header.creator_hardware=std::string("BlueGene/Q");
|
||||
header.creation_date =std::string("AnnoDomini");
|
||||
header.archive_date =std::string("AnnoDomini");
|
||||
header.floating_point =std::string("IEEE64BIG");
|
||||
// header.data_start=;
|
||||
// unsigned int checksum;
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Now write the body
|
||||
//////////////////////////////////////////////////
|
||||
{
|
||||
std::ofstream fout(file,std::ios::binary|std::ios::out);
|
||||
fout.seekp(offset);
|
||||
|
||||
Umu = zero;
|
||||
uint32_t csum=0;
|
||||
fobj file_object;
|
||||
sobj unmunged;
|
||||
for(int t=0;t<grid->_fdimensions[3];t++){
|
||||
for(int z=0;z<grid->_fdimensions[2];z++){
|
||||
for(int y=0;y<grid->_fdimensions[1];y++){
|
||||
for(int x=0;x<grid->_fdimensions[0];x++){
|
||||
std::vector<int> site({x,y,z,t});
|
||||
peekSite(unmunged,Umu,site);
|
||||
munge(unmunged,file_object,csum);
|
||||
// broadcast & insert
|
||||
fout.write((char *)&file_object,sizeof(file_object));
|
||||
}}}}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std::string file)
|
||||
{
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
int offset = readNerscHeader(file,Umu._grid,header);
|
||||
NerscField clone(header);
|
||||
|
||||
std::string format(header.floating_point);
|
||||
|
||||
@ -516,48 +311,106 @@ inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
|
||||
uint32_t csum;
|
||||
// depending on datatype, set up munger;
|
||||
// munger is a function of <floating point, Real, data_type>
|
||||
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
|
||||
if ( ieee32 || ieee32big ) {
|
||||
readNerscObject<vLorentzColourMatrix, LorentzColourMatrix, LorentzColour2x3F>
|
||||
(Umu,file,
|
||||
Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(),
|
||||
offset,format);
|
||||
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
|
||||
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
|
||||
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
|
||||
}
|
||||
if ( ieee64 || ieee64big ) {
|
||||
readNerscObject<vLorentzColourMatrix, LorentzColourMatrix, LorentzColour2x3D>
|
||||
(Umu,file,
|
||||
Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),
|
||||
offset,format);
|
||||
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
|
||||
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
|
||||
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
|
||||
}
|
||||
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3X3") ) {
|
||||
if ( ieee32 || ieee32big ) {
|
||||
readNerscObject<vLorentzColourMatrix,LorentzColourMatrix,LorentzColourMatrixF>
|
||||
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
|
||||
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
|
||||
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
|
||||
}
|
||||
if ( ieee64 || ieee64big ) {
|
||||
readNerscObject<vLorentzColourMatrix,LorentzColourMatrix,LorentzColourMatrixD>
|
||||
// csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
|
||||
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
|
||||
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
|
||||
}
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
NerscStatistics<GaugeField>(Umu,clone);
|
||||
|
||||
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
|
||||
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
|
||||
assert(csum == header.checksum );
|
||||
|
||||
std::cout<<GridLogMessage <<"Read NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline void writeNerscConfiguration(Lattice<vobj> &Umu,NerscField &header,std::string file)
|
||||
template<class vsimd>
|
||||
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32)
|
||||
{
|
||||
GridBase &grid = Umu._grid;
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
|
||||
typedef iLorentzColourMatrix<vsimd> vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
// Following should become arguments
|
||||
NerscField header;
|
||||
header.sequence_number = 1;
|
||||
header.ensemble_id = "UKQCD";
|
||||
header.ensemble_label = "DWF";
|
||||
|
||||
typedef LorentzColourMatrixD fobj3D;
|
||||
typedef LorentzColour2x3D fobj2D;
|
||||
typedef LorentzColourMatrixF fobj3f;
|
||||
typedef LorentzColour2x3F fobj2f;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
NerscGrid(grid,header);
|
||||
NerscStatistics<GaugeField>(Umu,header);
|
||||
NerscMachineCharacteristics(header);
|
||||
|
||||
uint32_t csum;
|
||||
int offset;
|
||||
|
||||
NerscStatisics(Umu,header);
|
||||
if ( two_row ) {
|
||||
|
||||
int offset = writeNerscHeader(header,file);
|
||||
header.floating_point = std::string("IEEE64BIG");
|
||||
header.data_type = std::string("4D_SU3_GAUGE");
|
||||
Nersc3x2unmunger<fobj2D,sobj> munge;
|
||||
BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum);
|
||||
offset = writeHeader(header,file);
|
||||
csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
|
||||
|
||||
writeNerscObject(Umu,NerscSimpleMunger<vobj,vobj>(),offset);
|
||||
}
|
||||
std::string file1 = file+"para";
|
||||
int offset1 = writeHeader(header,file1);
|
||||
int csum1=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file1,munge,offset,header.floating_point);
|
||||
|
||||
|
||||
std::cout << GridLogMessage << " TESTING PARALLEL WRITE offsets " << offset1 << " "<< offset << std::endl;
|
||||
std::cout << GridLogMessage <<std::hex<< " TESTING PARALLEL WRITE csums " << csum1 << " "<< csum << std::endl;
|
||||
std::cout << std::dec;
|
||||
|
||||
}
|
||||
assert(offset1==offset);
|
||||
assert(csum1==csum);
|
||||
|
||||
} else {
|
||||
header.floating_point = std::string("IEEE64BIG");
|
||||
header.data_type = std::string("4D_SU3_GAUGE_3X3");
|
||||
NerscSimpleUnmunger<fobj3D,sobj> munge;
|
||||
BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
|
||||
offset = writeHeader(header,file);
|
||||
csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage <<"Written NERSC Configuration "<<file<< " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
}}
|
||||
#endif
|
||||
|
@ -14,7 +14,7 @@
|
||||
#ifndef SOURCE_PUGIXML_CPP
|
||||
#define SOURCE_PUGIXML_CPP
|
||||
|
||||
#include "pugixml.hpp"
|
||||
#include <pugixml/pugixml.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
@ -17,7 +17,7 @@
|
||||
#endif
|
||||
|
||||
// Include user configuration file (this can define various configuration macros)
|
||||
#include "pugiconfig.hpp"
|
||||
#include <pugixml/pugiconfig.hpp>
|
||||
|
||||
#ifndef HEADER_PUGIXML_HPP
|
||||
#define HEADER_PUGIXML_HPP
|
@ -62,7 +62,6 @@ namespace QCD {
|
||||
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||
|
||||
|
||||
// Spin matrix
|
||||
typedef iSpinMatrix<Complex > SpinMatrix;
|
||||
typedef iSpinMatrix<ComplexF > SpinMatrixF;
|
||||
@ -154,7 +153,7 @@ namespace QCD {
|
||||
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
|
||||
|
||||
// singlets
|
||||
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
|
||||
|
||||
@ -162,7 +161,7 @@ namespace QCD {
|
||||
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
|
||||
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
|
||||
|
||||
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
||||
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
||||
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
|
||||
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
|
||||
|
||||
@ -251,6 +250,8 @@ namespace QCD {
|
||||
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
|
||||
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
|
||||
|
||||
template<class GF> using LorentzScalar = Lattice<iScalar<typename GF::vector_object::element> >;
|
||||
|
||||
// Uhgg... typing this hurt ;)
|
||||
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
|
||||
typedef Lattice<vColourVector> LatticeStaggeredFermion;
|
||||
|
@ -7,16 +7,15 @@ template<class GaugeField>
|
||||
class Action {
|
||||
|
||||
public:
|
||||
virtual void init (const GaugeField &U, GridParallelRNG& pRNG) = 0; //
|
||||
// Boundary conditions? // Heatbath?
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions
|
||||
virtual RealD S (const GaugeField &U) = 0; // evaluate the action
|
||||
virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative
|
||||
virtual void refresh(const GaugeField & ) {}; // Default to no-op for actions with no internal fields
|
||||
// Boundary conditions?
|
||||
// Heatbath?
|
||||
virtual ~Action() {};
|
||||
};
|
||||
|
||||
// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh
|
||||
/*
|
||||
template<class GaugeField, class FermionField>
|
||||
class PseudoFermionAction : public Action<GaugeField> {
|
||||
public:
|
||||
@ -32,5 +31,28 @@ class PseudoFermionAction : public Action<GaugeField> {
|
||||
};
|
||||
|
||||
};
|
||||
*/
|
||||
|
||||
template<class GaugeField> struct ActionLevel{
|
||||
public:
|
||||
|
||||
typedef Action<GaugeField>* ActPtr; // now force the same colours as the rest of the code
|
||||
|
||||
int multiplier;
|
||||
|
||||
std::vector<ActPtr> actions;
|
||||
|
||||
ActionLevel(int mul = 1) : multiplier(mul) {
|
||||
assert (mul > 0);
|
||||
};
|
||||
|
||||
void push_back(ActPtr ptr){
|
||||
actions.push_back(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
template<class GaugeField> using ActionSet = std::vector<ActionLevel< GaugeField > >;
|
||||
|
||||
|
||||
}}
|
||||
#endif
|
||||
|
@ -16,10 +16,6 @@
|
||||
#include <qcd/action/ActionBase.h>
|
||||
#include <qcd/action/ActionParams.h>
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Gauge Actions
|
||||
////////////////////////////////////////////
|
||||
#include <qcd/action/gauge/WilsonGaugeAction.h>
|
||||
|
||||
|
||||
////////////////////////////////////////////
|
||||
@ -31,6 +27,17 @@
|
||||
#include <qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
||||
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Gauge Actions
|
||||
////////////////////////////////////////////
|
||||
#include <qcd/action/gauge/WilsonGaugeAction.h>
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
typedef WilsonGaugeAction<LatticeGaugeField> WilsonGaugeActionR;
|
||||
typedef WilsonGaugeAction<LatticeGaugeFieldF> WilsonGaugeActionF;
|
||||
typedef WilsonGaugeAction<LatticeGaugeFieldD> WilsonGaugeActionD;
|
||||
}}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Explicit explicit template instantiation is still required in the .cc files
|
||||
//
|
||||
@ -47,10 +54,10 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define FermOpTemplateInstantiate(A) \
|
||||
template class A<GparityWilsonImplF>; \
|
||||
template class A<GparityWilsonImplD>; \
|
||||
template class A<WilsonImplF>; \
|
||||
template class A<WilsonImplD>;
|
||||
template class A<WilsonImplD>;
|
||||
// template class A<GparityWilsonImplF>; \
|
||||
// template class A<GparityWilsonImplD>;
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Fermion operators / actions
|
||||
@ -72,8 +79,8 @@
|
||||
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
||||
|
||||
#include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
|
||||
#include <qcd/action/fermion/OverlapWilsonContFracTanhFermion.h>
|
||||
#include <qcd/action/fermion/OverlapWilsonContFracZolotarevFermion.h>
|
||||
#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
|
||||
#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
|
||||
|
||||
#include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
|
||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
||||
|
@ -82,7 +82,8 @@ namespace QCD {
|
||||
template<class Impl>
|
||||
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
out = (4.0+mass)*in;
|
||||
typename FermionField::scalar_type scal(4.0+mass);
|
||||
out = scal*in;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
|
@ -28,18 +28,31 @@ namespace Grid {
|
||||
void DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
|
||||
#define HANDOPT
|
||||
#ifdef HANDOPT
|
||||
void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out);
|
||||
|
||||
void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out);
|
||||
#else
|
||||
|
||||
void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out){
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
||||
}
|
||||
|
||||
void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out){
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
||||
}
|
||||
#endif
|
||||
|
||||
WilsonKernels(const ImplParams &p= ImplParams()) : Base(p) {};
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
#define REGISTER
|
||||
|
||||
#define LOAD_CHIMU \
|
||||
const vSpinColourVector & ref (in._odata[offset]); \
|
||||
const SiteSpinor & ref (in._odata[offset]); \
|
||||
Chimu_00=ref()(0)(0);\
|
||||
Chimu_01=ref()(0)(1);\
|
||||
Chimu_02=ref()(0)(2);\
|
||||
@ -18,7 +18,7 @@
|
||||
Chimu_32=ref()(3)(2);
|
||||
|
||||
#define LOAD_CHI\
|
||||
const vHalfSpinColourVector &ref(buf[offset]); \
|
||||
const SiteHalfSpinor &ref(buf[offset]); \
|
||||
Chi_00 = ref()(0)(0);\
|
||||
Chi_01 = ref()(0)(1);\
|
||||
Chi_02 = ref()(0)(2);\
|
||||
@ -56,13 +56,13 @@
|
||||
UChi_02+= U_20*Chi_02;\
|
||||
UChi_12+= U_20*Chi_12;
|
||||
|
||||
#define PERMUTE\
|
||||
permute(Chi_00,Chi_00,ptype);\
|
||||
permute(Chi_01,Chi_01,ptype);\
|
||||
permute(Chi_02,Chi_02,ptype);\
|
||||
permute(Chi_10,Chi_10,ptype);\
|
||||
permute(Chi_11,Chi_11,ptype);\
|
||||
permute(Chi_12,Chi_12,ptype);
|
||||
#define PERMUTE_DIR(dir) \
|
||||
permute##dir(Chi_00,Chi_00);\
|
||||
permute##dir(Chi_01,Chi_01);\
|
||||
permute##dir(Chi_02,Chi_02);\
|
||||
permute##dir(Chi_10,Chi_10);\
|
||||
permute##dir(Chi_11,Chi_11);\
|
||||
permute##dir(Chi_12,Chi_12);
|
||||
|
||||
// hspin(0)=fspin(0)+timesI(fspin(3));
|
||||
// hspin(1)=fspin(1)+timesI(fspin(2));
|
||||
@ -280,12 +280,16 @@
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
#if 0
|
||||
template<class Simd>
|
||||
void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||
#ifdef HANDOPT
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// std::cout << "Hand op Dhop "<<std::endl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
REGISTER Simd result_00; // 12 regs on knc
|
||||
REGISTER Simd result_01;
|
||||
REGISTER Simd result_02;
|
||||
@ -339,20 +343,20 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
||||
#define Chimu_32 UChi_12
|
||||
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
int ss=sF;
|
||||
|
||||
// Xp
|
||||
offset = st._offsets [Xp][ss];
|
||||
local = st._is_local[Xp][ss];
|
||||
perm = st._permute[Xp][ss];
|
||||
ptype = st._permute_type[Xp];
|
||||
SE=st.GetEntry(ptype,Xp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
XP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -364,16 +368,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
||||
XP_RECON;
|
||||
|
||||
// Yp
|
||||
offset = st._offsets [Yp][ss];
|
||||
local = st._is_local[Yp][ss];
|
||||
perm = st._permute[Yp][ss];
|
||||
ptype = st._permute_type[Yp];
|
||||
SE=st.GetEntry(ptype,Yp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
YP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -385,16 +389,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
||||
|
||||
|
||||
// Zp
|
||||
offset = st._offsets [Zp][ss];
|
||||
local = st._is_local[Zp][ss];
|
||||
perm = st._permute[Zp][ss];
|
||||
ptype = st._permute_type[Zp];
|
||||
SE=st.GetEntry(ptype,Zp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
ZP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -405,16 +409,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
||||
ZP_RECON_ACCUM;
|
||||
|
||||
// Tp
|
||||
offset = st._offsets [Tp][ss];
|
||||
local = st._is_local[Tp][ss];
|
||||
perm = st._permute[Tp][ss];
|
||||
ptype = st._permute_type[Tp];
|
||||
SE=st.GetEntry(ptype,Tp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
TP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -425,16 +429,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
||||
TP_RECON_ACCUM;
|
||||
|
||||
// Xm
|
||||
offset = st._offsets [Xm][ss];
|
||||
local = st._is_local[Xm][ss];
|
||||
perm = st._permute[Xm][ss];
|
||||
ptype = st._permute_type[Xm];
|
||||
SE=st.GetEntry(ptype,Xm,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
XM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -445,16 +449,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
||||
XM_RECON_ACCUM;
|
||||
|
||||
// Ym
|
||||
offset = st._offsets [Ym][ss];
|
||||
local = st._is_local[Ym][ss];
|
||||
perm = st._permute[Ym][ss];
|
||||
ptype = st._permute_type[Ym];
|
||||
SE=st.GetEntry(ptype,Ym,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
YM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -465,16 +469,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
||||
YM_RECON_ACCUM;
|
||||
|
||||
// Zm
|
||||
offset = st._offsets [Zm][ss];
|
||||
local = st._is_local[Zm][ss];
|
||||
perm = st._permute[Zm][ss];
|
||||
ptype = st._permute_type[Zm];
|
||||
SE=st.GetEntry(ptype,Zm,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
ZM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -485,16 +489,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
||||
ZM_RECON_ACCUM;
|
||||
|
||||
// Tm
|
||||
offset = st._offsets [Tm][ss];
|
||||
local = st._is_local[Tm][ss];
|
||||
perm = st._permute[Tm][ss];
|
||||
ptype = st._permute_type[Tm];
|
||||
SE=st.GetEntry(ptype,Tm,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
TM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -505,7 +509,7 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
||||
TM_RECON_ACCUM;
|
||||
|
||||
{
|
||||
vSpinColourVector & ref (out._odata[ss]);
|
||||
SiteSpinor & ref (out._odata[ss]);
|
||||
vstream(ref()(0)(0),result_00*(-0.5));
|
||||
vstream(ref()(0)(1),result_01*(-0.5));
|
||||
vstream(ref()(0)(2),result_02*(-0.5));
|
||||
@ -521,11 +525,14 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
||||
}
|
||||
}
|
||||
|
||||
template<class Simd>
|
||||
void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
REGISTER Simd result_00; // 12 regs on knc
|
||||
REGISTER Simd result_01;
|
||||
REGISTER Simd result_02;
|
||||
@ -580,18 +587,19 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
||||
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
// Xp
|
||||
offset = st._offsets [Xp][ss];
|
||||
local = st._is_local[Xp][ss];
|
||||
perm = st._permute[Xp][ss];
|
||||
ptype = st._permute_type[Xp];
|
||||
SE=st.GetEntry(ptype,Xp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
XM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -602,16 +610,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
||||
XM_RECON;
|
||||
|
||||
// Yp
|
||||
offset = st._offsets [Yp][ss];
|
||||
local = st._is_local[Yp][ss];
|
||||
perm = st._permute[Yp][ss];
|
||||
ptype = st._permute_type[Yp];
|
||||
SE=st.GetEntry(ptype,Yp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
YM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -623,16 +631,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
||||
|
||||
|
||||
// Zp
|
||||
offset = st._offsets [Zp][ss];
|
||||
local = st._is_local[Zp][ss];
|
||||
perm = st._permute[Zp][ss];
|
||||
ptype = st._permute_type[Zp];
|
||||
SE=st.GetEntry(ptype,Zp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
ZM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -643,16 +651,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
||||
ZM_RECON_ACCUM;
|
||||
|
||||
// Tp
|
||||
offset = st._offsets [Tp][ss];
|
||||
local = st._is_local[Tp][ss];
|
||||
perm = st._permute[Tp][ss];
|
||||
ptype = st._permute_type[Tp];
|
||||
SE=st.GetEntry(ptype,Tp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
TM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -663,16 +671,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
||||
TM_RECON_ACCUM;
|
||||
|
||||
// Xm
|
||||
offset = st._offsets [Xm][ss];
|
||||
local = st._is_local[Xm][ss];
|
||||
perm = st._permute[Xm][ss];
|
||||
ptype = st._permute_type[Xm];
|
||||
SE=st.GetEntry(ptype,Xm,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
XP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -684,16 +692,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
||||
|
||||
|
||||
// Ym
|
||||
offset = st._offsets [Ym][ss];
|
||||
local = st._is_local[Ym][ss];
|
||||
perm = st._permute[Ym][ss];
|
||||
ptype = st._permute_type[Ym];
|
||||
SE=st.GetEntry(ptype,Ym,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
YP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -704,16 +712,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
||||
YP_RECON_ACCUM;
|
||||
|
||||
// Zm
|
||||
offset = st._offsets [Zm][ss];
|
||||
local = st._is_local[Zm][ss];
|
||||
perm = st._permute[Zm][ss];
|
||||
ptype = st._permute_type[Zm];
|
||||
|
||||
SE=st.GetEntry(ptype,Zm,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
ZP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -724,16 +732,16 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
||||
ZP_RECON_ACCUM;
|
||||
|
||||
// Tm
|
||||
offset = st._offsets [Tm][ss];
|
||||
local = st._is_local[Tm][ss];
|
||||
perm = st._permute[Tm][ss];
|
||||
ptype = st._permute_type[Tm];
|
||||
|
||||
SE=st.GetEntry(ptype,Tm,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
TP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE;
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
@ -759,5 +767,6 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
||||
vstream(ref()(3)(2),result_32*(-0.5));
|
||||
}
|
||||
}
|
||||
FermOpTemplateInstantiate(WilsonKernels);
|
||||
#endif
|
||||
}}
|
||||
|
@ -7,35 +7,42 @@ namespace Grid{
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Wilson Gauge Action .. should I template the Nc etc..
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
template<class GaugeField, class MatrixField>
|
||||
class WilsonGaugeAction : public Action<GaugeField> {
|
||||
template<class GaugeField>
|
||||
class WilsonGaugeAction : public Action<GaugeField> {
|
||||
public:
|
||||
|
||||
typedef LorentzScalar<GaugeField> GaugeLinkField;
|
||||
|
||||
private:
|
||||
RealD beta;
|
||||
public:
|
||||
WilsonGaugeAction(RealD b):beta(b){};
|
||||
|
||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {};
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {}; // noop as no pseudoferms
|
||||
|
||||
virtual RealD S(const GaugeField &U) {
|
||||
RealD plaq = WilsonLoops<MatrixField,GaugeField>::avgPlaquette(U);
|
||||
RealD plaq = WilsonLoops<GaugeField>::avgPlaquette(U);
|
||||
std::cout<<GridLogMessage << "Plaq : "<<plaq << "\n";
|
||||
RealD vol = U._grid->gSites();
|
||||
RealD action=beta*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5;
|
||||
std::cout << GridLogMessage << "WilsonGauge action "<<action<<std::endl;
|
||||
return action;
|
||||
};
|
||||
|
||||
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
|
||||
//not optimal implementation FIXME
|
||||
//extend Ta to include Lorentz indexes
|
||||
RealD factor = 0.5*beta/RealD(Nc);
|
||||
MatrixField dSdU_mu(U._grid);
|
||||
MatrixField Umu(U._grid);
|
||||
GaugeLinkField Umu(U._grid);
|
||||
GaugeLinkField dSdU_mu(U._grid);
|
||||
for (int mu=0; mu < Nd; mu++){
|
||||
|
||||
Umu = PeekIndex<LorentzIndex>(U,mu);
|
||||
|
||||
// Staple in direction mu
|
||||
WilsonLoops<MatrixField,GaugeField>::Staple(dSdU_mu,U,mu);
|
||||
WilsonLoops<GaugeField>::Staple(dSdU_mu,U,mu);
|
||||
dSdU_mu = Ta(Umu*adj(dSdU_mu))*factor;
|
||||
pokeLorentz(dSdU, dSdU_mu, mu);
|
||||
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
@ -61,7 +61,7 @@ namespace Grid{
|
||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
||||
};
|
||||
|
||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
|
||||
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1/2 phi}
|
||||
// = e^{- phi^dag (MpcdagMpc)^-1/4 (MpcdagMpc)^-1/4 phi}
|
||||
|
@ -61,7 +61,7 @@ namespace Grid{
|
||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
||||
};
|
||||
|
||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
|
||||
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
|
||||
//
|
||||
|
@ -57,7 +57,7 @@ namespace Grid{
|
||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
||||
};
|
||||
|
||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
|
||||
// P(phi) = e^{- phi^dag (MdagM)^-1/2 phi}
|
||||
// = e^{- phi^dag (MdagM)^-1/4 (MdagM)^-1/4 phi}
|
||||
|
@ -55,7 +55,7 @@ namespace Grid{
|
||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
||||
};
|
||||
|
||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
|
||||
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
|
||||
//
|
||||
|
@ -35,7 +35,7 @@ namespace Grid{
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Push the gauge field in to the dops. Assume any BC's and smearing already applied
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
|
||||
// P(phi) = e^{- phi^dag (MdagM)^-1 phi}
|
||||
// Phi = Mdag eta
|
||||
|
@ -44,7 +44,7 @@ namespace Grid{
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Push the gauge field in to the dops. Assume any BC's and smearing already applied
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
|
||||
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1 phi}
|
||||
// Phi = McpDag eta
|
||||
|
@ -40,7 +40,7 @@ namespace Grid{
|
||||
conformable(_NumOp.GaugeRedBlackGrid(), _DenOp.GaugeRedBlackGrid());
|
||||
};
|
||||
|
||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
|
||||
// P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
|
||||
//
|
||||
|
@ -28,7 +28,7 @@ namespace Grid{
|
||||
OperatorFunction<FermionField> & AS
|
||||
) : NumOp(_NumOp), DenOp(_DenOp), DerivativeSolver(DS), ActionSolver(AS), Phi(_NumOp.FermionGrid()) {};
|
||||
|
||||
virtual void init(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
|
||||
// P(phi) = e^{- phi^dag V (MdagM)^-1 Vdag phi}
|
||||
//
|
||||
|
@ -8,8 +8,8 @@ namespace Grid{
|
||||
|
||||
////////////////////////////// Default values
|
||||
Nsweeps = 200;
|
||||
TotalSweeps = 220;
|
||||
ThermalizationSteps = 20;
|
||||
TotalSweeps = 240;
|
||||
ThermalizationSteps = 40;
|
||||
StartingConfig = 0;
|
||||
SaveInterval = 1;
|
||||
Filename_prefix = "Conf_";
|
||||
|
@ -15,6 +15,7 @@
|
||||
namespace Grid{
|
||||
namespace QCD{
|
||||
|
||||
|
||||
struct HMCparameters{
|
||||
Integer Nsweeps; /* @brief Number of sweeps in this run */
|
||||
Integer TotalSweeps; /* @brief If provided, the total number of sweeps */
|
||||
@ -26,14 +27,17 @@ namespace Grid{
|
||||
HMCparameters();
|
||||
};
|
||||
|
||||
template <class Algorithm>
|
||||
class HybridMonteCarlo{
|
||||
// template <class GaugeField, class Integrator, class Smearer, class Boundary>
|
||||
template <class GaugeField, class IntegratorType>
|
||||
class HybridMonteCarlo {
|
||||
private:
|
||||
|
||||
const HMCparameters Params;
|
||||
|
||||
GridSerialRNG &sRNG; // Fixme: need a RNG management strategy.
|
||||
GridParallelRNG &pRNG; // Fixme: need a RNG management strategy.
|
||||
|
||||
GridSerialRNG sRNG; // Fixme: need a RNG management strategy.
|
||||
|
||||
Integrator<Algorithm>& MD;
|
||||
IntegratorType &TheIntegrator;
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
// Metropolis step
|
||||
@ -63,16 +67,20 @@ namespace Grid{
|
||||
/////////////////////////////////////////////////////////
|
||||
// Evolution
|
||||
/////////////////////////////////////////////////////////
|
||||
RealD evolve_step(LatticeGaugeField& U){
|
||||
MD.init(U); // set U and initialize P and phi's
|
||||
RealD evolve_step(GaugeField& U){
|
||||
|
||||
TheIntegrator.refresh(U,pRNG); // set U and initialize P and phi's
|
||||
|
||||
RealD H0 = TheIntegrator.S(U); // initial state action
|
||||
|
||||
RealD H0 = MD.S(U); // initial state action
|
||||
std::cout<<GridLogMessage<<"Total H before = "<< H0 << "\n";
|
||||
|
||||
MD.integrate(U);
|
||||
TheIntegrator.integrate(U);
|
||||
|
||||
RealD H1 = MD.S(U); // updated state action
|
||||
RealD H1 = TheIntegrator.S(U); // updated state action
|
||||
|
||||
std::cout<<GridLogMessage<<"Total H after = "<< H1 << "\n";
|
||||
|
||||
return (H1-H0);
|
||||
}
|
||||
|
||||
@ -81,16 +89,18 @@ namespace Grid{
|
||||
/////////////////////////////////////////
|
||||
// Constructor
|
||||
/////////////////////////////////////////
|
||||
HybridMonteCarlo(HMCparameters Pms, Integrator<Algorithm>& MolDyn): Params(Pms),MD(MolDyn) {
|
||||
|
||||
//FIXME... initialize RNGs also with seed ; RNG management strategy
|
||||
sRNG.SeedRandomDevice();
|
||||
|
||||
HybridMonteCarlo(HMCparameters Pms, IntegratorType &_Int, GridSerialRNG &_sRNG, GridParallelRNG &_pRNG ) :
|
||||
Params(Pms),
|
||||
TheIntegrator(_Int),
|
||||
sRNG(_sRNG),
|
||||
pRNG(_pRNG)
|
||||
{
|
||||
}
|
||||
~HybridMonteCarlo(){};
|
||||
|
||||
|
||||
void evolve(LatticeGaugeField& Uin){
|
||||
void evolve(GaugeField& Uin){
|
||||
|
||||
Real DeltaH;
|
||||
|
||||
// Thermalizations
|
||||
@ -102,9 +112,8 @@ namespace Grid{
|
||||
}
|
||||
|
||||
// Actual updates (evolve a copy Ucopy then copy back eventually)
|
||||
LatticeGaugeField Ucopy(Uin._grid);
|
||||
for(int iter=Params.StartingConfig;
|
||||
iter < Params.Nsweeps+Params.StartingConfig; ++iter){
|
||||
GaugeField Ucopy(Uin._grid);
|
||||
for(int iter=Params.StartingConfig; iter < Params.Nsweeps+Params.StartingConfig; ++iter){
|
||||
std::cout<<GridLogMessage << "-- # Sweep = "<< iter << "\n";
|
||||
|
||||
Ucopy = Uin;
|
||||
|
@ -1,28 +0,0 @@
|
||||
/*!
|
||||
@file Integrator_base.cc
|
||||
@brief utilities for MD including funcs to generate initial HMC momentum
|
||||
*/
|
||||
|
||||
#include <Grid.h>
|
||||
|
||||
namespace Grid{
|
||||
namespace QCD{
|
||||
|
||||
void MDutils::generate_momenta(LatticeLorentzColourMatrix& P,GridParallelRNG& pRNG){
|
||||
// for future support of different groups
|
||||
MDutils::generate_momenta_su3(P, pRNG);
|
||||
}
|
||||
|
||||
void MDutils::generate_momenta_su3(LatticeLorentzColourMatrix& P,GridParallelRNG& pRNG){
|
||||
LatticeColourMatrix Pmu(P._grid);
|
||||
Pmu = zero;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
SU3::GaussianLieAlgebraMatrix(pRNG, Pmu);
|
||||
PokeIndex<LorentzIndex>(P, Pmu, mu);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
@ -10,143 +10,193 @@
|
||||
#ifndef INTEGRATOR_INCLUDED
|
||||
#define INTEGRATOR_INCLUDED
|
||||
|
||||
class Observer;
|
||||
//class Observer;
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace Grid{
|
||||
namespace QCD{
|
||||
|
||||
typedef Action<LatticeGaugeField>* ActPtr; // now force the same colours as the rest of the code
|
||||
struct ActionLevel{
|
||||
int multiplier;
|
||||
public:
|
||||
std::vector<ActPtr> actions;
|
||||
explicit ActionLevel(int mul = 1):multiplier(mul){assert (mul > 0);};
|
||||
void push_back(ActPtr ptr){
|
||||
actions.push_back(ptr);
|
||||
}
|
||||
};
|
||||
typedef std::vector<ActionLevel> ActionSet;
|
||||
typedef std::vector<Observer*> ObserverList;
|
||||
|
||||
struct IntegratorParameters{
|
||||
|
||||
int Nexp;
|
||||
int MDsteps; // number of outer steps
|
||||
RealD trajL; // trajectory length
|
||||
RealD stepsize;
|
||||
|
||||
IntegratorParameters(int Nexp_,
|
||||
int MDsteps_,
|
||||
RealD trajL_):
|
||||
Nexp(Nexp_),MDsteps(MDsteps_),trajL(trajL_),stepsize(trajL/MDsteps){};
|
||||
IntegratorParameters(int MDsteps_,
|
||||
RealD trajL_=1.0,
|
||||
int Nexp_=12):
|
||||
Nexp(Nexp_),
|
||||
MDsteps(MDsteps_),
|
||||
trajL(trajL_),
|
||||
stepsize(trajL/MDsteps)
|
||||
{
|
||||
// empty body constructor
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
||||
namespace MDutils{
|
||||
void generate_momenta(LatticeGaugeField&,GridParallelRNG&);
|
||||
void generate_momenta_su3(LatticeGaugeField&,GridParallelRNG&);
|
||||
}
|
||||
|
||||
/*! @brief Class for Molecular Dynamics management */
|
||||
template< class IntegratorAlgorithm >
|
||||
class Integrator{
|
||||
private:
|
||||
template<class GaugeField>
|
||||
class Integrator {
|
||||
|
||||
protected:
|
||||
|
||||
typedef IntegratorParameters ParameterType;
|
||||
|
||||
IntegratorParameters Params;
|
||||
const ActionSet as;
|
||||
std::unique_ptr<LatticeGaugeField> P;
|
||||
GridParallelRNG pRNG;
|
||||
//ObserverList observers; // not yet
|
||||
|
||||
IntegratorAlgorithm TheIntegrator;
|
||||
|
||||
void register_observers();
|
||||
void notify_observers();
|
||||
const ActionSet<GaugeField> as;
|
||||
|
||||
void update_P(LatticeGaugeField&U, int level,double ep){
|
||||
for(int a=0; a<as[level].actions.size(); ++a){
|
||||
LatticeGaugeField force(U._grid);
|
||||
as[level].actions.at(a)->deriv(U,force);
|
||||
*P -= force*ep;
|
||||
int levels; //
|
||||
double t_U; // Track time passing on each level and for U and for P
|
||||
std::vector<double> t_P; //
|
||||
|
||||
GaugeField P;
|
||||
|
||||
// Should match any legal (SU(n)) gauge field
|
||||
// Need to use this template to match Ncol to pass to SU<N> class
|
||||
template<int Ncol,class vec> void generate_momenta(Lattice< iVector< iScalar< iMatrix<vec,Ncol> >, Nd> > & P,GridParallelRNG& pRNG){
|
||||
typedef Lattice< iScalar< iScalar< iMatrix<vec,Ncol> > > > GaugeLinkField;
|
||||
GaugeLinkField Pmu(P._grid);
|
||||
Pmu = zero;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
SU<Ncol>::GaussianLieAlgebraMatrix(pRNG, Pmu);
|
||||
PokeIndex<LorentzIndex>(P, Pmu, mu);
|
||||
}
|
||||
}
|
||||
|
||||
void update_U(LatticeGaugeField&U, double ep){
|
||||
|
||||
//ObserverList observers; // not yet
|
||||
// typedef std::vector<Observer*> ObserverList;
|
||||
// void register_observers();
|
||||
// void notify_observers();
|
||||
|
||||
void update_P(GaugeField&U, int level,double ep){
|
||||
t_P[level]+=ep;
|
||||
update_P(P,U,level,ep);
|
||||
|
||||
std::cout<<GridLogMessage;
|
||||
for(int l=0; l<level;++l) std::cout<<" ";
|
||||
std::cout<<"["<<level<<"] P " << " dt "<< ep <<" : t_P "<< t_P[level] <<std::endl;
|
||||
}
|
||||
|
||||
void update_P(GaugeField &Mom,GaugeField&U, int level,double ep){
|
||||
for(int a=0; a<as[level].actions.size(); ++a){
|
||||
GaugeField force(U._grid);
|
||||
as[level].actions.at(a)->deriv(U,force);
|
||||
Mom = Mom - force*ep;
|
||||
}
|
||||
}
|
||||
|
||||
void update_U(GaugeField&U, double ep){
|
||||
update_U(P,U,ep);
|
||||
|
||||
t_U+=ep;
|
||||
int fl = levels-1;
|
||||
std::cout<<GridLogMessage<<" ";
|
||||
for(int l=0; l<fl;++l) std::cout<<" ";
|
||||
std::cout<<"["<<fl<<"] U " << " dt "<< ep <<" : t_U "<< t_U <<std::endl;
|
||||
|
||||
}
|
||||
void update_U(GaugeField &Mom, GaugeField&U, double ep){
|
||||
//rewrite exponential to deal automatically with the lorentz index?
|
||||
LatticeColourMatrix Umu(U._grid);
|
||||
LatticeColourMatrix Pmu(U._grid);
|
||||
// GaugeLinkField Umu(U._grid);
|
||||
// GaugeLinkField Pmu(U._grid);
|
||||
for (int mu = 0; mu < Nd; mu++){
|
||||
Umu=PeekIndex<LorentzIndex>(U, mu);
|
||||
Pmu=PeekIndex<LorentzIndex>(*P, mu);
|
||||
auto Umu=PeekIndex<LorentzIndex>(U, mu);
|
||||
auto Pmu=PeekIndex<LorentzIndex>(Mom, mu);
|
||||
Umu = expMat(Pmu, ep, Params.Nexp)*Umu;
|
||||
ProjectOnGroup(Umu);
|
||||
PokeIndex<LorentzIndex>(U, Umu, mu);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
friend void IntegratorAlgorithm::step (LatticeGaugeField& U,
|
||||
int level, std::vector<int>& clock,
|
||||
Integrator<IntegratorAlgorithm>* Integ);
|
||||
virtual void step (GaugeField& U,int level, int first,int last)=0;
|
||||
|
||||
public:
|
||||
Integrator(GridBase* grid, IntegratorParameters Par,
|
||||
ActionSet& Aset):
|
||||
Params(Par),as(Aset),P(new LatticeGaugeField(grid)),pRNG(grid){
|
||||
pRNG.SeedRandomDevice();
|
||||
|
||||
Integrator(GridBase* grid,
|
||||
IntegratorParameters Par,
|
||||
ActionSet<GaugeField> & Aset):
|
||||
Params(Par),
|
||||
as(Aset),
|
||||
P(grid),
|
||||
levels(Aset.size())
|
||||
{
|
||||
t_P.resize(levels,0.0);
|
||||
t_U=0.0;
|
||||
};
|
||||
|
||||
~Integrator(){}
|
||||
virtual ~Integrator(){}
|
||||
|
||||
//Initialization of momenta and actions
|
||||
void init(LatticeGaugeField& U){
|
||||
std::cout<<GridLogMessage<< "Integrator init\n";
|
||||
MDutils::generate_momenta(*P,pRNG);
|
||||
void refresh(GaugeField& U,GridParallelRNG &pRNG){
|
||||
std::cout<<GridLogMessage<< "Integrator refresh\n";
|
||||
generate_momenta(P,pRNG);
|
||||
for(int level=0; level< as.size(); ++level){
|
||||
for(int actionID=0; actionID<as[level].actions.size(); ++actionID){
|
||||
as[level].actions.at(actionID)->init(U, pRNG);
|
||||
as[level].actions.at(actionID)->refresh(U, pRNG);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate action
|
||||
RealD S(LatticeGaugeField& U){
|
||||
LatticeComplex Hloc(U._grid);
|
||||
Hloc = zero;
|
||||
RealD S(GaugeField& U){
|
||||
|
||||
LatticeComplex Hloc(U._grid); Hloc = zero;
|
||||
// Momenta
|
||||
for (int mu=0; mu <Nd; mu++){
|
||||
LatticeColourMatrix Pmu = peekLorentz(*P, mu);
|
||||
auto Pmu = PeekIndex<LorentzIndex>(P, mu);
|
||||
Hloc -= trace(Pmu*Pmu);
|
||||
}
|
||||
Complex Hsum = sum(Hloc);
|
||||
|
||||
RealD H = Hsum.real();
|
||||
|
||||
RealD Hterm;
|
||||
std::cout<<GridLogMessage << "Momentum action H_p = "<< H << "\n";
|
||||
|
||||
// Actions
|
||||
for(int level=0; level<as.size(); ++level)
|
||||
for(int actionID=0; actionID<as[level].actions.size(); ++actionID)
|
||||
H += as[level].actions.at(actionID)->S(U);
|
||||
|
||||
for(int level=0; level<as.size(); ++level){
|
||||
for(int actionID=0; actionID<as[level].actions.size(); ++actionID){
|
||||
Hterm = as[level].actions.at(actionID)->S(U);
|
||||
std::cout<<GridLogMessage << "Level "<<level<<" term "<<actionID<<" H = "<<Hterm<<std::endl;
|
||||
H += Hterm;
|
||||
}
|
||||
}
|
||||
std::cout<<GridLogMessage << "Total action H = "<< H << "\n";
|
||||
|
||||
return H;
|
||||
}
|
||||
|
||||
void integrate(LatticeGaugeField& U){
|
||||
std::vector<int> clock;
|
||||
clock.resize(as.size(),0);
|
||||
for(int step=0; step< Params.MDsteps; ++step) // MD step
|
||||
TheIntegrator.step(U,0,clock, (this));
|
||||
void integrate(GaugeField& U){
|
||||
|
||||
// reset the clocks
|
||||
t_U=0;
|
||||
for(int level=0; level<as.size(); ++level){
|
||||
t_P[level]=0;
|
||||
}
|
||||
|
||||
for(int step=0; step< Params.MDsteps; ++step){ // MD step
|
||||
int first_step = (step==0);
|
||||
int last_step = (step==Params.MDsteps-1);
|
||||
this->step(U,0,first_step,last_step);
|
||||
}
|
||||
|
||||
// Check the clocks all match on all levels
|
||||
for(int level=0; level<as.size(); ++level){
|
||||
assert(fabs(t_U - t_P[level])<1.0e-6); // must be the same
|
||||
std::cout<<GridLogMessage<<" times["<<level<<"]= "<<t_P[level]<< " " << t_U <<std::endl;
|
||||
}
|
||||
|
||||
// and that we indeed got to the end of the trajectory
|
||||
assert(fabs(t_U-Params.trajL) < 1.0e-6);
|
||||
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
#endif//INTEGRATOR_INCLUDED
|
||||
|
@ -9,138 +9,236 @@
|
||||
#ifndef INTEGRATOR_ALG_INCLUDED
|
||||
#define INTEGRATOR_ALG_INCLUDED
|
||||
|
||||
|
||||
namespace Grid{
|
||||
namespace QCD{
|
||||
|
||||
/* PAB:
|
||||
*
|
||||
* Recursive leapfrog; explanation of nested stepping
|
||||
*
|
||||
* Nested 1:4; units in dt for top level integrator
|
||||
*
|
||||
* CHROMA IroIro
|
||||
* 0 1 0
|
||||
* P 1/2 P 1/2
|
||||
* P 1/16 P1/16
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/16 P1/8
|
||||
* P 1 P 1
|
||||
* P 1/16 * skipped --- avoids revaluating force
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/16 P1/8
|
||||
* P 1 P 1
|
||||
* P 1/16 * skipped
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/16 * skipped
|
||||
* P 1 P 1
|
||||
* P 1/16 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/16 P1/16
|
||||
* P 1/2 P 1/2
|
||||
*/
|
||||
|
||||
class MinimumNorm2{
|
||||
const double lambda = 0.1931833275037836;
|
||||
template<class GaugeField> class LeapFrog : public Integrator<GaugeField> {
|
||||
public:
|
||||
void step (LatticeLorentzColourMatrix& U,
|
||||
int level, std::vector<int>& clock,
|
||||
Integrator<MinimumNorm2>* Integ){
|
||||
|
||||
typedef LeapFrog<GaugeField> Algorithm;
|
||||
|
||||
LeapFrog(GridBase* grid,
|
||||
IntegratorParameters Par,
|
||||
ActionSet<GaugeField> & Aset): Integrator<GaugeField>(grid,Par,Aset) {};
|
||||
|
||||
|
||||
void step (GaugeField& U, int level,int _first, int _last){
|
||||
|
||||
int fl = this->as.size() -1;
|
||||
// level : current level
|
||||
// fl : final level
|
||||
// eps : current step size
|
||||
|
||||
int fl = Integ->as.size() -1;
|
||||
double eps = Integ->Params.stepsize;
|
||||
|
||||
for(int l=0; l<=level; ++l) eps/= 2.0*Integ->as[l].multiplier;
|
||||
|
||||
int fin = Integ->as[0].multiplier;
|
||||
for(int l=1; l<=level; ++l) fin*= 2.0*Integ->as[l].multiplier;
|
||||
fin = 3*Integ->Params.MDsteps*fin -1;
|
||||
|
||||
for(int e=0; e<Integ->as[level].multiplier; ++e){
|
||||
if(clock[level] == 0){ // initial half step
|
||||
Integ->update_P(U,level,lambda*eps);
|
||||
++clock[level];
|
||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
||||
std::cout<<GridLogMessage<<"P "<< clock[level] <<std::endl;
|
||||
}
|
||||
|
||||
if(level == fl){ // lowest level
|
||||
Integ->update_U(U,0.5*eps);
|
||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
||||
std::cout<<GridLogMessage<<"U "<< (clock[level]+1) <<std::endl;
|
||||
}else{ // recursive function call
|
||||
step(U,level+1,clock, Integ);
|
||||
}
|
||||
|
||||
Integ->update_P(U,level,(1.0-2.0*lambda)*eps);
|
||||
++clock[level];
|
||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
||||
std::cout<<GridLogMessage<<"P "<< (clock[level]) <<std::endl;
|
||||
|
||||
if(level == fl){ // lowest level
|
||||
Integ->update_U(U,0.5*eps);
|
||||
|
||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
||||
std::cout<<GridLogMessage<<"U "<< (clock[level]+1) <<std::endl;
|
||||
}else{ // recursive function call
|
||||
step(U,level+1,clock, Integ);
|
||||
}
|
||||
|
||||
|
||||
if(clock[level] == fin){ // final half step
|
||||
Integ->update_P(U,level,lambda*eps);
|
||||
|
||||
++clock[level];
|
||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
||||
std::cout<<GridLogMessage<<"P "<< clock[level] <<std::endl;
|
||||
}else{ // bulk step
|
||||
Integ->update_P(U,level,lambda*2.0*eps);
|
||||
|
||||
clock[level]+=2;
|
||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
||||
std::cout<<GridLogMessage<<"P "<< clock[level] <<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class LeapFrog{
|
||||
public:
|
||||
void step (LatticeLorentzColourMatrix& U,
|
||||
int level, std::vector<int>& clock,
|
||||
Integrator<LeapFrog>* Integ){
|
||||
|
||||
// level : current level
|
||||
// fl : final level
|
||||
// eps : current step size
|
||||
|
||||
int fl = Integ->as.size() -1;
|
||||
double eps = Integ->Params.stepsize;
|
||||
|
||||
// Get current level step size
|
||||
for(int l=0; l<=level; ++l) eps/= Integ->as[l].multiplier;
|
||||
RealD eps = this->Params.stepsize;
|
||||
for(int l=0; l<=level; ++l) eps/= this->as[l].multiplier;
|
||||
|
||||
int fin = 1;
|
||||
for(int l=0; l<=level; ++l) fin*= Integ->as[l].multiplier;
|
||||
fin = 2*Integ->Params.MDsteps*fin - 1;
|
||||
|
||||
for(int e=0; e<Integ->as[level].multiplier; ++e){
|
||||
|
||||
if(clock[level] == 0){ // initial half step
|
||||
Integ->update_P(U, level,eps/2.0);
|
||||
++clock[level];
|
||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
||||
std::cout<<GridLogMessage<<"P "<< 0.5*clock[level] <<std::endl;
|
||||
int multiplier = this->as[level].multiplier;
|
||||
for(int e=0; e<multiplier; ++e){
|
||||
|
||||
int first_step = _first && (e==0);
|
||||
int last_step = _last && (e==multiplier-1);
|
||||
|
||||
if(first_step){ // initial half step
|
||||
this->update_P(U, level,eps/2.0);
|
||||
}
|
||||
|
||||
if(level == fl){ // lowest level
|
||||
Integ->update_U(U, eps);
|
||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
||||
std::cout<<GridLogMessage<<"U "<< 0.5*(clock[level]+1) <<std::endl;
|
||||
this->update_U(U, eps);
|
||||
}else{ // recursive function call
|
||||
step(U, level+1,clock, Integ);
|
||||
this->step(U, level+1,first_step,last_step);
|
||||
}
|
||||
|
||||
if(clock[level] == fin){ // final half step
|
||||
Integ->update_P(U, level,eps/2.0);
|
||||
++clock[level];
|
||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
||||
std::cout<<GridLogMessage<<"P "<< 0.5*clock[level] <<std::endl;
|
||||
}else{ // bulk step
|
||||
Integ->update_P(U, level,eps);
|
||||
clock[level]+=2;
|
||||
for(int l=0; l<level;++l) std::cout<<GridLogMessage<<" ";
|
||||
std::cout<<GridLogMessage<<"P "<< 0.5*clock[level] <<std::endl;
|
||||
}
|
||||
int mm = last_step ? 1 : 2;
|
||||
this->update_P(U, level,mm*eps/2.0);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template<class GaugeField> class MinimumNorm2 : public Integrator<GaugeField> {
|
||||
private:
|
||||
const RealD lambda = 0.1931833275037836;
|
||||
|
||||
public:
|
||||
|
||||
MinimumNorm2(GridBase* grid,
|
||||
IntegratorParameters Par,
|
||||
ActionSet<GaugeField> & Aset): Integrator<GaugeField>(grid,Par,Aset) {};
|
||||
|
||||
void step (GaugeField& U, int level, int _first,int _last){
|
||||
|
||||
// level : current level
|
||||
// fl : final level
|
||||
// eps : current step size
|
||||
|
||||
int fl = this->as.size() -1;
|
||||
|
||||
RealD eps = this->Params.stepsize*2.0;
|
||||
for(int l=0; l<=level; ++l) eps/= 2.0*this->as[l].multiplier;
|
||||
|
||||
// Nesting: 2xupdate_U of size eps/2
|
||||
// Next level is eps/2/multiplier
|
||||
|
||||
int multiplier = this->as[level].multiplier;
|
||||
for(int e=0; e<multiplier; ++e){ // steps per step
|
||||
|
||||
int first_step = _first && (e==0);
|
||||
int last_step = _last && (e==multiplier-1);
|
||||
|
||||
if(first_step){ // initial half step
|
||||
this->update_P(U,level,lambda*eps);
|
||||
}
|
||||
|
||||
if(level == fl){ // lowest level
|
||||
this->update_U(U,0.5*eps);
|
||||
}else{ // recursive function call
|
||||
this->step(U,level+1,first_step,0);
|
||||
}
|
||||
|
||||
this->update_P(U,level,(1.0-2.0*lambda)*eps);
|
||||
|
||||
if(level == fl){ // lowest level
|
||||
this->update_U(U,0.5*eps);
|
||||
}else{ // recursive function call
|
||||
this->step(U,level+1,0,last_step);
|
||||
}
|
||||
|
||||
int mm = (last_step) ? 1 : 2;
|
||||
this->update_P(U,level,lambda*eps*mm);
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class GaugeField> class ForceGradient : public Integrator<GaugeField> {
|
||||
private:
|
||||
const RealD lambda = 1.0/6.0;;
|
||||
const RealD chi = 1.0/72.0;
|
||||
const RealD xi = 0.0;
|
||||
const RealD theta = 0.0;
|
||||
public:
|
||||
|
||||
// Looks like dH scales as dt^4. tested wilson/wilson 2 level.
|
||||
ForceGradient(GridBase* grid,
|
||||
IntegratorParameters Par,
|
||||
ActionSet<GaugeField> & Aset): Integrator<GaugeField>(grid,Par,Aset) {};
|
||||
|
||||
|
||||
void FG_update_P(GaugeField&U, int level,double fg_dt,double ep){
|
||||
GaugeField Ufg(U._grid);
|
||||
GaugeField Pfg(U._grid);
|
||||
Ufg = U;
|
||||
Pfg = zero;
|
||||
std::cout << GridLogMessage << "FG update "<<fg_dt<<" "<<ep<<std::endl;
|
||||
// prepare_fg; no prediction/result cache for now
|
||||
// could relax CG stopping conditions for the
|
||||
// derivatives in the small step since the force gets multiplied by
|
||||
// a tiny dt^2 term relative to main force.
|
||||
//
|
||||
// Presently 4 force evals, and should have 3, so 1.33x too expensive.
|
||||
// could reduce this with sloppy CG to perhaps 1.15x too expensive
|
||||
// even without prediction.
|
||||
this->update_P(Pfg,Ufg,level,1.0);
|
||||
this->update_U(Pfg,Ufg,fg_dt);
|
||||
this->update_P(Ufg,level,ep);
|
||||
}
|
||||
|
||||
void step (GaugeField& U, int level, int _first,int _last){
|
||||
|
||||
RealD eps = this->Params.stepsize*2.0;
|
||||
for(int l=0; l<=level; ++l) eps/= 2.0*this->as[l].multiplier;
|
||||
|
||||
RealD Chi = chi*eps*eps*eps;
|
||||
|
||||
int fl = this->as.size() -1;
|
||||
|
||||
int multiplier = this->as[level].multiplier;
|
||||
|
||||
for(int e=0; e<multiplier; ++e){ // steps per step
|
||||
|
||||
|
||||
int first_step = _first && (e==0);
|
||||
int last_step = _last && (e==multiplier-1);
|
||||
|
||||
if(first_step){ // initial half step
|
||||
this->update_P(U,level,lambda*eps);
|
||||
}
|
||||
|
||||
if(level == fl){ // lowest level
|
||||
this->update_U(U,0.5*eps);
|
||||
}else{ // recursive function call
|
||||
this->step(U,level+1,first_step,0);
|
||||
}
|
||||
|
||||
this->FG_update_P(U,level,2*Chi/((1.0-2.0*lambda)*eps),(1.0-2.0*lambda)*eps);
|
||||
|
||||
if(level == fl){ // lowest level
|
||||
this->update_U(U,0.5*eps);
|
||||
}else{ // recursive function call
|
||||
this->step(U,level+1,0,last_step);
|
||||
}
|
||||
|
||||
int mm = (last_step) ? 1 : 2;
|
||||
this->update_P(U,level,lambda*eps*mm);
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif//INTEGRATOR_INCLUDED
|
||||
|
@ -524,16 +524,22 @@ Note that in step D setting B ~ X - A and using B in place of A in step E will g
|
||||
// reunitarise??
|
||||
static void LieRandomize(GridParallelRNG &pRNG,LatticeMatrix &out,double scale=1.0){
|
||||
GridBase *grid = out._grid;
|
||||
|
||||
LatticeComplex ca (grid);
|
||||
LatticeMatrix lie(grid);
|
||||
LatticeMatrix la (grid);
|
||||
Complex ci(0.0,scale);
|
||||
Complex cone(1.0,0.0);
|
||||
Matrix ta;
|
||||
|
||||
lie=zero;
|
||||
for(int a=0;a<generators();a++){
|
||||
|
||||
random(pRNG,ca); ca=real(ca)-0.5;
|
||||
random(pRNG,ca);
|
||||
|
||||
ca = (ca+conjugate(ca))*0.5;
|
||||
ca = ca - 0.5;
|
||||
|
||||
generator(a,ta);
|
||||
|
||||
la=ci*ca*ta;
|
||||
|
@ -4,9 +4,12 @@ namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// Common wilson loop observables
|
||||
template<class GaugeMat,class GaugeLorentz>
|
||||
template<class GaugeLorentz>
|
||||
class WilsonLoops {
|
||||
public:
|
||||
|
||||
typedef LorentzScalar<GaugeLorentz> GaugeMat;
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// directed plaquette oriented in mu,nu plane
|
||||
//////////////////////////////////////////////////
|
||||
@ -68,6 +71,22 @@ public:
|
||||
|
||||
return sumplaq/vol/faces/Nc; // Nd , Nc dependent... FIXME
|
||||
}
|
||||
static RealD linkTrace(const GaugeLorentz &Umu){
|
||||
std::vector<GaugeMat> U(4,Umu._grid);
|
||||
|
||||
LatticeComplex Tr(Umu._grid); Tr=zero;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
Tr = Tr+trace(U[mu]);
|
||||
}
|
||||
|
||||
TComplex Tp = sum(Tr);
|
||||
Complex p = TensorRemove(Tp);
|
||||
|
||||
double vol = Umu._grid->gSites();
|
||||
|
||||
return p.real()/vol/4.0/3.0;
|
||||
};
|
||||
//////////////////////////////////////////////////
|
||||
// the sum over all staples on each site
|
||||
//////////////////////////////////////////////////
|
||||
@ -125,10 +144,10 @@ void siteRectangle(GaugeMat &plaq,const std::vector<GaugeMat> &U, const int mu,
|
||||
};
|
||||
|
||||
|
||||
typedef WilsonLoops<LatticeColourMatrix,LatticeGaugeField> ColourWilsonLoops;
|
||||
typedef WilsonLoops<LatticeColourMatrix,LatticeGaugeField> U1WilsonLoops;
|
||||
typedef WilsonLoops<LatticeColourMatrix,LatticeGaugeField> SU2WilsonLoops;
|
||||
typedef WilsonLoops<LatticeColourMatrix,LatticeGaugeField> SU3WilsonLoops;
|
||||
typedef WilsonLoops<LatticeGaugeField> ColourWilsonLoops;
|
||||
typedef WilsonLoops<LatticeGaugeField> U1WilsonLoops;
|
||||
typedef WilsonLoops<LatticeGaugeField> SU2WilsonLoops;
|
||||
typedef WilsonLoops<LatticeGaugeField> SU3WilsonLoops;
|
||||
|
||||
}}
|
||||
|
||||
|
49
lib/serialisation/BaseIO.h
Normal file
49
lib/serialisation/BaseIO.h
Normal file
@ -0,0 +1,49 @@
|
||||
#ifndef GRID_SERIALISATION_ABSTRACT_READER_H
|
||||
#define GRID_SERIALISATION_ABSTRACT_READER_H
|
||||
|
||||
namespace Grid {
|
||||
class Writer {
|
||||
public:
|
||||
|
||||
virtual ~Writer() {};
|
||||
|
||||
virtual void push(const std::string &s) = 0;
|
||||
virtual void pop(void) =0;
|
||||
virtual void write( const std::string& s,const std::string &output ) =0;
|
||||
virtual void write( const std::string& s,const int16_t output ) =0;
|
||||
virtual void write( const std::string& s,const uint16_t output ) =0;
|
||||
virtual void write( const std::string& s,const int32_t output ) =0;
|
||||
virtual void write( const std::string& s,const uint32_t output ) =0;
|
||||
virtual void write( const std::string& s,const int64_t output ) =0;
|
||||
virtual void write( const std::string& s,const uint64_t output ) =0;
|
||||
virtual void write( const std::string& s,const float output ) =0;
|
||||
virtual void write( const std::string& s,const double output ) =0;
|
||||
virtual void write( const std::string& s,const bool output ) =0;
|
||||
|
||||
};
|
||||
|
||||
class Reader {
|
||||
public:
|
||||
|
||||
|
||||
|
||||
virtual ~Reader() {};
|
||||
|
||||
virtual void read( const std::string& s,std::string &output ) =0;
|
||||
virtual void push(const std::string &s) =0;
|
||||
virtual void pop(void) = 0;
|
||||
|
||||
virtual void read( const std::string& s, int16_t &output ) =0;
|
||||
virtual void read( const std::string& s, uint16_t &output ) =0;
|
||||
virtual void read( const std::string& s, int32_t &output ) =0;
|
||||
virtual void read( const std::string& s, uint32_t &output ) =0;
|
||||
virtual void read( const std::string& s, int64_t &output ) =0;
|
||||
virtual void read( const std::string& s, uint64_t &output ) =0;
|
||||
virtual void read( const std::string& s, float &output ) =0;
|
||||
virtual void read( const std::string& s, double &output ) =0;
|
||||
virtual void read( const std::string& s, bool &output ) =0;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
109
lib/serialisation/BinaryIO.h
Normal file
109
lib/serialisation/BinaryIO.h
Normal file
@ -0,0 +1,109 @@
|
||||
#ifndef GRID_SERIALISATION_BINARY_READER_H
|
||||
#define GRID_SERIALISATION_BINARY_READER_H
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <math.h>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class BinaryWriter : public Writer {
|
||||
private:
|
||||
|
||||
std::ofstream file;
|
||||
|
||||
public:
|
||||
|
||||
BinaryWriter(const std::string &_file) : file(_file,std::ios::binary|std::ios::out) {}
|
||||
|
||||
~BinaryWriter() {}
|
||||
|
||||
// Binary is scopeless
|
||||
void push(const std::string &s) {}
|
||||
void pop(void) {}
|
||||
|
||||
void write(const std::string& s,const std::string &output) {
|
||||
uint32_t sz = output.size();
|
||||
write(s,sz);
|
||||
const char * cstr = output.c_str();
|
||||
for(int c=0;c<output.size();c++){
|
||||
write(s,cstr[c]);
|
||||
}
|
||||
};
|
||||
void write( const std::string& s,const char output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const int16_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const uint16_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const int32_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const uint32_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const int64_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const uint64_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const float output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const double output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const bool output ) { writeInternal(s,output); };
|
||||
|
||||
private:
|
||||
template<class T> void writeInternal( const std::string& s,const T output ){
|
||||
// FIXME --- htons, htonl, htno64 etc..
|
||||
file.write((char *)&output,sizeof(T));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
class BinaryReader : public Reader{
|
||||
private:
|
||||
|
||||
std::ifstream file;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
BinaryReader(const std::string &_file) : file(_file,std::ios::binary|std::ios::in) {}
|
||||
|
||||
~BinaryReader() {}
|
||||
|
||||
// Binary is scopeless
|
||||
void push(const std::string &s) { }
|
||||
void pop(void) { }
|
||||
|
||||
void read( const std::string& s,std::string &output ) {
|
||||
|
||||
output.clear();
|
||||
|
||||
uint32_t sz;
|
||||
file.read((char *)&sz,sizeof(sz));
|
||||
|
||||
for(int c=0;c<sz;c++){
|
||||
char ch;
|
||||
file.read(&ch,sizeof(ch));
|
||||
output.push_back(ch);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
void read( const std::string& s, int16_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, uint16_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, int32_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, uint32_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, int64_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, uint64_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, float &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, double &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, bool &output ) { readInternal(s,output); };
|
||||
|
||||
|
||||
private:
|
||||
|
||||
template<class T> void readInternal( const std::string& path, T &output ){
|
||||
file.read((char *)&output,sizeof(output)); // byte order??
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -1,6 +1,47 @@
|
||||
#ifndef GRID_MACRO_MAGIC_H
|
||||
#define GRID_MACRO_MAGIC_H
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Strong credit to :
|
||||
//
|
||||
// http://jhnet.co.uk/articles/cpp_magic
|
||||
//
|
||||
//
|
||||
// "The C Pre-Processor (CPP) is the somewhat basic macro system used by the C
|
||||
// programming language to implement features such as #include and #define
|
||||
// which allow very simple text-substitutions to be carried out at compile time.
|
||||
// In this article we abuse the humble #define to implement if-statements and iteration.
|
||||
//
|
||||
// Before we begin, a disclaimer: these tricks, while perfectly valid C, should not be
|
||||
// considered good development practice and should almost certainly not be used for "real work".
|
||||
// That said it can totally be used for fun home-automation projects...
|
||||
//
|
||||
// https://github.com/18sg/uSHET/blob/master/lib/cpp_magic.h
|
||||
//
|
||||
// The cpp_magic.h license (prior to my modifications):
|
||||
/*
|
||||
Copyright (c) 2014 Thomas Nixon, Jonathan Heathcote
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
///////////////////////////////////////////
|
||||
|
||||
#define strong_inline __attribute__((always_inline)) inline
|
||||
|
||||
#ifndef MAX
|
||||
@ -63,9 +104,15 @@
|
||||
|
||||
#define _GRID_MACRO_MAP() GRID_MACRO_MAP
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// These are the Grid extensions to serialisation (beyond popping first AND second)
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define GRID_MACRO_MEMBER(A,B) A B;
|
||||
|
||||
#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" "#B <<" = "<< obj. B <<" ; " <<std::endl;
|
||||
#define GRID_MACRO_READ_MEMBER(A,B) read(RD,#B,obj. B);
|
||||
#define GRID_MACRO_WRITE_MEMBER(A,B) write(WR,#B,obj. B);
|
||||
|
||||
#define GRID_DECL_CLASS_MEMBERS(cname,...) \
|
||||
\
|
||||
@ -73,11 +120,26 @@
|
||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__)) \
|
||||
\
|
||||
\
|
||||
friend std::ostream & operator << (std::ostream &os, const cname &obj ) { \
|
||||
friend void write(Writer &WR,const std::string &s, const cname &obj){ \
|
||||
push(WR,s);\
|
||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \
|
||||
pop(WR);\
|
||||
} \
|
||||
\
|
||||
\
|
||||
friend void read(Reader &RD,const std::string &s, cname &obj){ \
|
||||
push(RD,s);\
|
||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_READ_MEMBER,__VA_ARGS__)) \
|
||||
pop(RD);\
|
||||
} \
|
||||
\
|
||||
\
|
||||
friend std::ostream & operator << (std::ostream &os, const cname &obj ) { \
|
||||
os<<"class "<<#cname<<" {"<<std::endl;\
|
||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_OS_WRITE_MEMBER,__VA_ARGS__)) \
|
||||
os<<"}"; \
|
||||
return os;\
|
||||
};
|
||||
|
||||
|
||||
#endif
|
103
lib/serialisation/Serialisation.h
Normal file
103
lib/serialisation/Serialisation.h
Normal file
@ -0,0 +1,103 @@
|
||||
#ifndef GRID_SERIALISATION_READER_H
|
||||
#define GRID_SERIALISATION_READER_H
|
||||
|
||||
#include <serialisation/MacroMagic.h>
|
||||
#include <serialisation/BaseIO.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
// Generic reader writer interface
|
||||
inline void push(Writer & WR,const std::string &s) { WR.push(s);}
|
||||
inline void push(Writer & WR,const char *s) { WR.push(std::string(s));}
|
||||
inline void pop (Writer & WR) { WR.pop();}
|
||||
|
||||
// inline void write(Writer& wr, const std::string& s,const char * output ) { wr.write(s,std::string(output)); };
|
||||
inline void write(Writer& wr, const std::string& s,const std::string &output) { wr.write(s,output); };
|
||||
inline void write(Writer& wr, const std::string& s,const int16_t output ) { wr.write(s,output); };
|
||||
inline void write(Writer& wr, const std::string& s,const uint16_t output ) { wr.write(s,output); };
|
||||
inline void write(Writer& wr, const std::string& s,const int32_t output ) { wr.write(s,output); };
|
||||
inline void write(Writer& wr, const std::string& s,const uint32_t output ) { wr.write(s,output); };
|
||||
inline void write(Writer& wr, const std::string& s,const int64_t output ) { wr.write(s,output); };
|
||||
inline void write(Writer& wr, const std::string& s,const uint64_t output ) { wr.write(s,output); };
|
||||
inline void write(Writer& wr, const std::string& s,const float output ) { wr.write(s,output); };
|
||||
inline void write(Writer& wr, const std::string& s,const double output ) { wr.write(s,output); };
|
||||
inline void write(Writer& wr, const std::string& s,const bool output ) { wr.write(s,output); };
|
||||
|
||||
inline void push(Reader & WR,const std::string &s) { WR.push(s);}
|
||||
inline void push(Reader & WR,const char *s) { WR.push(std::string(s));}
|
||||
inline void pop (Reader & WR) { WR.pop();}
|
||||
|
||||
inline void read(Reader& rd, const std::string& s,std::string &output) { rd.read(s,output); };
|
||||
inline void read(Reader& rd, const std::string& s, int16_t &output ) { rd.read(s,output); };
|
||||
inline void read(Reader& rd, const std::string& s, uint16_t &output ) { rd.read(s,output); };
|
||||
inline void read(Reader& rd, const std::string& s, int32_t &output ) { rd.read(s,output); };
|
||||
inline void read(Reader& rd, const std::string& s, uint32_t &output ) { rd.read(s,output); };
|
||||
inline void read(Reader& rd, const std::string& s, int64_t &output ) { rd.read(s,output); };
|
||||
inline void read(Reader& rd, const std::string& s, uint64_t &output ) { rd.read(s,output); };
|
||||
inline void read(Reader& rd, const std::string& s, float &output ) { rd.read(s,output); };
|
||||
inline void read(Reader& rd, const std::string& s, double &output ) { rd.read(s,output); };
|
||||
inline void read(Reader& rd, const std::string& s, bool &output ) { rd.read(s,output); };
|
||||
|
||||
|
||||
template<class T> void write(Writer& wr, const std::string& s,const std::vector<T> output ) {
|
||||
push(wr,s);
|
||||
uint64_t sz =output.size();
|
||||
write(wr,"N",sz);
|
||||
for(int i=0;i<output.size();i++){
|
||||
std::ostringstream oss; oss << "elem" << i;
|
||||
write(wr,oss.str(),output[i]);
|
||||
}
|
||||
pop(wr);
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<class T>
|
||||
void read(Reader& rd, const std::string& s,std::vector<T> &output ) {
|
||||
|
||||
push(rd,s);
|
||||
|
||||
uint64_t sz; read(rd,"N",sz);
|
||||
// skip the vector length
|
||||
T tmp;
|
||||
output.resize(0);
|
||||
for(int i=0;i<sz;i++){
|
||||
std::ostringstream oss; oss << "elem" << i;
|
||||
read(rd,oss.str(),tmp);
|
||||
output.push_back(tmp);
|
||||
}
|
||||
pop(rd);
|
||||
};
|
||||
|
||||
template < class T >
|
||||
inline std::ostream& operator << (std::ostream& os, const std::vector<T>& v)
|
||||
{
|
||||
os << "[";
|
||||
for (typename std::vector<T>::const_iterator ii = v.begin(); ii != v.end(); ++ii)
|
||||
{
|
||||
os << " " << *ii;
|
||||
}
|
||||
os << " ]";
|
||||
return os;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Todo:
|
||||
//////////////////////////////////////////
|
||||
#include <serialisation/BinaryIO.h>
|
||||
#include <serialisation/TextIO.h>
|
||||
//#include <serialisation/JsonIO.h>
|
||||
//#include <serialisation/YamlIO.h>
|
||||
#include <serialisation/XmlIO.h>
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Select the default serialiser use ifdef's
|
||||
//////////////////////////////////////////
|
||||
namespace Grid {
|
||||
typedef XMLReader DefaultReader;
|
||||
typedef XMLWriter DefaultWriter;
|
||||
}
|
||||
#endif
|
148
lib/serialisation/TextIO.h
Normal file
148
lib/serialisation/TextIO.h
Normal file
@ -0,0 +1,148 @@
|
||||
#ifndef GRID_SERIALISATION_TEXT_READER_H
|
||||
#define GRID_SERIALISATION_TEXT_READER_H
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <math.h>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class TextWriter : public Writer {
|
||||
private:
|
||||
|
||||
std::ofstream file;
|
||||
int level;
|
||||
void indent(void) {
|
||||
for(int i=0;i<level;i++){
|
||||
file <<"\t";
|
||||
}
|
||||
}
|
||||
public:
|
||||
|
||||
TextWriter(const std::string &_file) : file(_file,std::ios::out) {
|
||||
level=0;
|
||||
}
|
||||
|
||||
~TextWriter() { }
|
||||
|
||||
void push(const std::string &s)
|
||||
{
|
||||
// std::string tmp = s;
|
||||
// write(s,tmp);
|
||||
level++;
|
||||
}
|
||||
void pop(void) {
|
||||
level--;
|
||||
}
|
||||
|
||||
void write( const std::string& s,const std::string &output ) {
|
||||
indent();
|
||||
file<<output<<std::endl;
|
||||
};
|
||||
void write( const std::string& s,const int16_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const uint16_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const int32_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const uint32_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const int64_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const uint64_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const float output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const double output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const bool output ) { writeInternal(s,output); };
|
||||
|
||||
private:
|
||||
|
||||
template<class T> void writeInternal( const std::string& s,const T output ){
|
||||
indent();
|
||||
file << std::boolalpha << output<<std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class TextReader : public Reader {
|
||||
private:
|
||||
|
||||
std::ifstream file;
|
||||
int level;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
TextReader(const std::string &_file) : file(_file,std::ios::in) { level = 0;};
|
||||
|
||||
~TextReader() { }
|
||||
|
||||
void read( const std::string& s,std::string &output ) {
|
||||
char c='a';
|
||||
for(int i=0;i<level;i++){
|
||||
file.get(c);
|
||||
if ( c != '\t' )
|
||||
std::cout << "mismatch on tab "<<c<<" level "<< level<< " i "<< i<<std::endl;
|
||||
}
|
||||
output.clear();
|
||||
std::getline(file,output);
|
||||
};
|
||||
void push(const std::string &s) {
|
||||
// std::string tmp; read(s,tmp);
|
||||
level++;
|
||||
}
|
||||
void pop(void) { level--; }
|
||||
|
||||
template<class T>
|
||||
void read( const std::string& s, std::vector<T> &output ) {
|
||||
|
||||
push(s);
|
||||
|
||||
uint64_t n; read("N",n);
|
||||
|
||||
// skip the vector length
|
||||
T tmp;
|
||||
output.resize(0);
|
||||
for(int i=0;i<n;i++){
|
||||
std::ostringstream oss; oss << "elem" << i;
|
||||
read(*this,oss.str(),tmp);
|
||||
output.push_back(tmp);
|
||||
}
|
||||
|
||||
pop();
|
||||
|
||||
};
|
||||
|
||||
void read( const std::string& s, int16_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, uint16_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, int32_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, uint32_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, int64_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, uint64_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, float &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, double &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, bool &output ) { readInternal(s,output); };
|
||||
|
||||
|
||||
private:
|
||||
|
||||
template<class T> void readInternal( const std::string& path, T &output ){
|
||||
std::string asString;
|
||||
read(path,asString);
|
||||
convert(asString,output);
|
||||
}
|
||||
|
||||
template<class T> void convert(const std::string &asString,T &output)
|
||||
{
|
||||
std::istringstream is(asString); is.exceptions(std::ios::failbit);
|
||||
try {
|
||||
is >> std::boolalpha >> output;
|
||||
} catch(std::istringstream::failure e) {
|
||||
std::cerr << "XML read failure on "<<" "<<asString<<" "<<typeid(T).name()<<std::endl;
|
||||
}
|
||||
assert( is.tellg()==-1);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
146
lib/serialisation/XmlIO.h
Normal file
146
lib/serialisation/XmlIO.h
Normal file
@ -0,0 +1,146 @@
|
||||
#ifndef GRID_SERIALISATION_XML_READER_H
|
||||
#define GRID_SERIALISATION_XML_READER_H
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <math.h>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
#include "pugixml/pugixml.h"
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class XMLWriter : public Writer {
|
||||
private:
|
||||
|
||||
pugi::xml_document doc;
|
||||
pugi::xml_node node;
|
||||
std::string file;
|
||||
|
||||
public:
|
||||
|
||||
XMLWriter(const std::string &_file) : file(_file)
|
||||
{
|
||||
node=doc.append_child();
|
||||
node.set_name("document");
|
||||
}
|
||||
|
||||
~XMLWriter()
|
||||
{
|
||||
// simple_walker walker;
|
||||
// doc.traverse(walker);
|
||||
|
||||
doc.save_file(file.c_str()," ");
|
||||
}
|
||||
|
||||
void push(const std::string &s)
|
||||
{
|
||||
node = node.append_child(s.c_str());
|
||||
}
|
||||
void pop(void) {
|
||||
node = node.parent();
|
||||
}
|
||||
|
||||
void write( const std::string& s,const std::string &output ) {
|
||||
pugi::xml_node leaf=node.append_child(s.c_str());
|
||||
leaf.append_child(pugi::node_pcdata).set_value(output.c_str());
|
||||
};
|
||||
|
||||
void write( const std::string& s,const int16_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const uint16_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const int32_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const uint32_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const int64_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const uint64_t output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const float output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const double output ) { writeInternal(s,output); };
|
||||
void write( const std::string& s,const bool output ) { writeInternal(s,output); };
|
||||
|
||||
private:
|
||||
|
||||
template<class T> void writeInternal( const std::string& s,const T output ){
|
||||
std::ostringstream os;
|
||||
os << std::boolalpha << output;
|
||||
write(s,os.str());
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
class XMLReader : public Reader {
|
||||
private:
|
||||
|
||||
pugi::xml_document doc;
|
||||
pugi::xml_node node;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
XMLReader(const std::string &_file)
|
||||
{
|
||||
pugi::xml_parse_result result = doc.load_file(_file.c_str());
|
||||
|
||||
if ( !result ) {
|
||||
std::cout << "XML error description: " << result.description() << "\n";
|
||||
std::cout << "XML error offset: " << result.offset << "\n";
|
||||
}
|
||||
|
||||
assert(result);
|
||||
|
||||
// simple_walker walker;
|
||||
// doc.traverse(walker);
|
||||
|
||||
node= doc.child("document");
|
||||
}
|
||||
|
||||
~XMLReader() { }
|
||||
|
||||
void read( const std::string& s,std::string &output ) {
|
||||
output=node.child(s.c_str()).first_child().value();
|
||||
};
|
||||
void push(const std::string &s)
|
||||
{
|
||||
node = node.child(s.c_str());
|
||||
}
|
||||
void pop(void) {
|
||||
node = node.parent();
|
||||
}
|
||||
|
||||
void read( const std::string& s, int16_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, uint16_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, int32_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, uint32_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, int64_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, uint64_t &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, float &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, double &output ) { readInternal(s,output); };
|
||||
void read( const std::string& s, bool &output ) { readInternal(s,output); };
|
||||
|
||||
|
||||
private:
|
||||
|
||||
template<class T> void readInternal( const std::string& path, T &output ){
|
||||
std::string asString;
|
||||
read(path,asString);
|
||||
convert(asString,output);
|
||||
}
|
||||
|
||||
template<class T> void convert(const std::string &asString,T &output)
|
||||
{
|
||||
std::istringstream is(asString); is.exceptions(std::ios::failbit);
|
||||
try {
|
||||
is >> std::boolalpha >> output;
|
||||
} catch(std::istringstream::failure e) {
|
||||
std::cerr << "XML read failure on "<<" "<<asString<<" "<<typeid(T).name()<<std::endl;
|
||||
}
|
||||
assert( is.tellg()==-1);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -183,11 +183,11 @@ namespace Optimization {
|
||||
// Complex float
|
||||
inline __m256 operator()(__m256 a, __m256 b){
|
||||
__m256 ymm0,ymm1,ymm2;
|
||||
ymm0 = _mm256_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
|
||||
ymm0 = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar,
|
||||
ymm0 = _mm256_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||
// FIXME AVX2 could MAC
|
||||
ymm1 = _mm256_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
|
||||
ymm2 = _mm256_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
|
||||
ymm1 = _mm256_shuffle_ps(b,b,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // ymm1 <- br,bi
|
||||
ymm2 = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(3,3,1,1)); // ymm2 <- ai,ai
|
||||
ymm1 = _mm256_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||
return _mm256_addsub_ps(ymm0,ymm1);
|
||||
}
|
||||
@ -270,7 +270,7 @@ namespace Optimization {
|
||||
//Complex single
|
||||
inline __m256 operator()(__m256 in, __m256 ret){
|
||||
__m256 tmp =_mm256_addsub_ps(_mm256_setzero_ps(),in); // r,-i
|
||||
return _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); //-i,r
|
||||
return _mm256_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(2,3,0,1)); //-i,r
|
||||
}
|
||||
//Complex double
|
||||
inline __m256d operator()(__m256d in, __m256d ret){
|
||||
@ -282,7 +282,7 @@ namespace Optimization {
|
||||
struct TimesI{
|
||||
//Complex single
|
||||
inline __m256 operator()(__m256 in, __m256 ret){
|
||||
__m256 tmp =_mm256_shuffle_ps(in,in,_MM_SHUFFLE(2,3,0,1)); // i,r
|
||||
__m256 tmp =_mm256_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // i,r
|
||||
return _mm256_addsub_ps(_mm256_setzero_ps(),tmp); // i,-r
|
||||
}
|
||||
//Complex double
|
||||
@ -296,27 +296,44 @@ namespace Optimization {
|
||||
// Some Template specialization
|
||||
//////////////////////////////////////////////
|
||||
|
||||
template < typename vtype >
|
||||
void permute(vtype &a,vtype b, int perm) {
|
||||
uconv<vtype> conv;
|
||||
conv.v = b;
|
||||
switch (perm){
|
||||
// 8x32 bits=>3 permutes
|
||||
case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
|
||||
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
|
||||
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
a = conv.v;
|
||||
}
|
||||
struct Permute{
|
||||
|
||||
static inline __m256 Permute0(__m256 in){
|
||||
return _mm256_permute2f128_ps(in,in,0x01);
|
||||
};
|
||||
static inline __m256 Permute1(__m256 in){
|
||||
return _mm256_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||
};
|
||||
static inline __m256 Permute2(__m256 in){
|
||||
return _mm256_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||
};
|
||||
static inline __m256 Permute3(__m256 in){
|
||||
return in;
|
||||
};
|
||||
|
||||
static inline __m256d Permute0(__m256d in){
|
||||
return _mm256_permute2f128_pd(in,in,0x01);
|
||||
};
|
||||
static inline __m256d Permute1(__m256d in){
|
||||
return _mm256_shuffle_pd(in,in,0x5);
|
||||
};
|
||||
static inline __m256d Permute2(__m256d in){
|
||||
return in;
|
||||
};
|
||||
static inline __m256d Permute3(__m256d in){
|
||||
return in;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m256>::operator()(__m256 in){
|
||||
__m256 v1,v2;
|
||||
Optimization::permute(v1,in,0); // avx 256; quad complex single
|
||||
v1 = _mm256_add_ps(v1,in);
|
||||
Optimization::permute(v2,v1,1);
|
||||
v1=Optimization::Permute::Permute0(in); // avx 256; quad complex single
|
||||
v1= _mm256_add_ps(v1,in);
|
||||
v2=Optimization::Permute::Permute1(v1);
|
||||
v1 = _mm256_add_ps(v1,v2);
|
||||
u256f conv; conv.v = v1;
|
||||
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
||||
@ -326,11 +343,11 @@ namespace Optimization {
|
||||
template<>
|
||||
inline Grid::RealF Reduce<Grid::RealF, __m256>::operator()(__m256 in){
|
||||
__m256 v1,v2;
|
||||
Optimization::permute(v1,in,0); // avx 256; octo-double
|
||||
v1 = Optimization::Permute::Permute0(in); // avx 256; octo-double
|
||||
v1 = _mm256_add_ps(v1,in);
|
||||
Optimization::permute(v2,v1,1);
|
||||
v2 = Optimization::Permute::Permute1(v1);
|
||||
v1 = _mm256_add_ps(v1,v2);
|
||||
Optimization::permute(v2,v1,2);
|
||||
v2 = Optimization::Permute::Permute2(v1);
|
||||
v1 = _mm256_add_ps(v1,v2);
|
||||
u256f conv; conv.v=v1;
|
||||
return conv.f[0];
|
||||
@ -341,7 +358,7 @@ namespace Optimization {
|
||||
template<>
|
||||
inline Grid::ComplexD Reduce<Grid::ComplexD, __m256d>::operator()(__m256d in){
|
||||
__m256d v1;
|
||||
Optimization::permute(v1,in,0); // sse 128; paired complex single
|
||||
v1 = Optimization::Permute::Permute0(in); // sse 128; paired complex single
|
||||
v1 = _mm256_add_pd(v1,in);
|
||||
u256d conv; conv.v = v1;
|
||||
return Grid::ComplexD(conv.f[0],conv.f[1]);
|
||||
@ -351,9 +368,9 @@ namespace Optimization {
|
||||
template<>
|
||||
inline Grid::RealD Reduce<Grid::RealD, __m256d>::operator()(__m256d in){
|
||||
__m256d v1,v2;
|
||||
Optimization::permute(v1,in,0); // avx 256; quad double
|
||||
v1 = Optimization::Permute::Permute0(in); // avx 256; quad double
|
||||
v1 = _mm256_add_pd(v1,in);
|
||||
Optimization::permute(v2,v1,1);
|
||||
v2 = Optimization::Permute::Permute1(v1);
|
||||
v1 = _mm256_add_pd(v1,v2);
|
||||
u256d conv; conv.v = v1;
|
||||
return conv.f[0];
|
||||
@ -387,13 +404,6 @@ namespace Grid {
|
||||
_mm_prefetch(ptr,_MM_HINT_T0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template < typename VectorSIMD >
|
||||
inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
|
||||
Optimization::permute(y.v,b.v,perm);
|
||||
};
|
||||
|
||||
// Function name aliases
|
||||
typedef Optimization::Vsplat VsplatSIMD;
|
||||
typedef Optimization::Vstore VstoreSIMD;
|
||||
|
@ -149,49 +149,33 @@ namespace Optimization {
|
||||
}
|
||||
};
|
||||
|
||||
// Note, we can beat the shuf overhead in chain with two temporaries
|
||||
// Ar Ai , Br Bi, Ai Ar // one shuf
|
||||
//tmpr Ar Br, Ai Bi // Mul/Mac/Mac
|
||||
//tmpi Br Ai, Bi Ar // Mul/Mac/Mac
|
||||
// add tmpi,shuf(tmpi)
|
||||
// sub tmpr,shuf(tmpi)
|
||||
// shuf(tmpr,tmpi). // Could drop/trade for write mask
|
||||
|
||||
// Gives
|
||||
// 2mul,4 mac +add+sub = 8 flop type insns
|
||||
// 3shuf + 2 (+shuf) = 5/6 simd perm and 1/2 the load.
|
||||
|
||||
struct MultComplex{
|
||||
// Complex float
|
||||
inline __m512 operator()(__m512 a, __m512 b){
|
||||
__m512 vzero,ymm0,ymm1,real, imag;
|
||||
vzero = _mm512_setzero_ps();
|
||||
ymm0 = _mm512_swizzle_ps(a, _MM_SWIZ_REG_CDAB); //
|
||||
real = (__m512)_mm512_mask_or_epi32((__m512i)a, 0xAAAA,(__m512i)vzero,(__m512i)ymm0);
|
||||
imag = _mm512_mask_sub_ps(a, 0x5555,vzero, ymm0);
|
||||
ymm1 = _mm512_mul_ps(real, b);
|
||||
ymm0 = _mm512_swizzle_ps(b, _MM_SWIZ_REG_CDAB); // OK
|
||||
return _mm512_fmadd_ps(ymm0,imag,ymm1);
|
||||
// dup, dup, perm, mul, madd
|
||||
__m512 a_real = _mm512_moveldup_ps( a ); // Ar Ar
|
||||
__m512 a_imag = _mm512_movehdup_ps( a ); // Ai Ai
|
||||
a_imag = _mm512_mul_ps( a_imag, _mm512_permute_ps( b, 0xB1 ) ); // (Ai, Ai) * (Bi, Br) = Ai Bi, Ai Br
|
||||
return _mm512_fmaddsub_ps( a_real, b, a_imag ); // Ar Br , Ar Bi +- Ai Bi = ArBr-AiBi , ArBi+AiBr
|
||||
}
|
||||
// Complex double
|
||||
inline __m512d operator()(__m512d a, __m512d b){
|
||||
/* This is from
|
||||
* Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets
|
||||
* @inproceedings{McFarlin:2011:ASV:1995896.1995938,
|
||||
* author = {McFarlin, Daniel S. and Arbatov, Volodymyr and Franchetti, Franz and P\"{u}schel, Markus},
|
||||
* title = {Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets},
|
||||
* booktitle = {Proceedings of the International Conference on Supercomputing},
|
||||
* series = {ICS '11},
|
||||
* year = {2011},
|
||||
* isbn = {978-1-4503-0102-2},
|
||||
* location = {Tucson, Arizona, USA},
|
||||
* pages = {265--274},
|
||||
* numpages = {10},
|
||||
* url = {http://doi.acm.org/10.1145/1995896.1995938},
|
||||
* doi = {10.1145/1995896.1995938},
|
||||
* acmid = {1995938},
|
||||
* publisher = {ACM},
|
||||
* address = {New York, NY, USA},
|
||||
* keywords = {autovectorization, fourier transform, program generation, simd, super-optimization},
|
||||
* }
|
||||
*/
|
||||
__m512d vzero,ymm0,ymm1,real,imag;
|
||||
vzero =_mm512_setzero_pd();
|
||||
ymm0 = _mm512_swizzle_pd(a, _MM_SWIZ_REG_CDAB); //
|
||||
real =(__m512d)_mm512_mask_or_epi64((__m512i)a, 0xAA,(__m512i)vzero,(__m512i) ymm0);
|
||||
imag = _mm512_mask_sub_pd(a, 0x55,vzero, ymm0);
|
||||
ymm1 = _mm512_mul_pd(real, b);
|
||||
ymm0 = _mm512_swizzle_pd(b, _MM_SWIZ_REG_CDAB); // OK
|
||||
return _mm512_fmadd_pd(ymm0,imag,ymm1);
|
||||
__m512d a_real = _mm512_shuffle_pd( a, a, 0x00 );
|
||||
__m512d a_imag = _mm512_shuffle_pd( a, a, 0xFF );
|
||||
a_imag = _mm512_mul_pd( a_imag, _mm512_permute_pd( b, 0x55 ) );
|
||||
return _mm512_fmaddsub_pd( a_real, b, a_imag );
|
||||
}
|
||||
};
|
||||
|
||||
@ -227,27 +211,25 @@ namespace Optimization {
|
||||
//Complex single
|
||||
inline __m512 operator()(__m512 in, __m512 ret){
|
||||
__m512 tmp = _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // real -imag
|
||||
return _mm512_swizzle_ps(tmp, _MM_SWIZ_REG_CDAB);// OK
|
||||
return _mm512_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(1,0,3,2)); // 0x4E??
|
||||
}
|
||||
//Complex double
|
||||
inline __m512d operator()(__m512d in, __m512d ret){
|
||||
__m512d tmp = _mm512_mask_sub_pd(in,0xaa,_mm512_setzero_pd(),in); // real -imag
|
||||
return _mm512_swizzle_pd(tmp, _MM_SWIZ_REG_CDAB);// OK
|
||||
}
|
||||
|
||||
|
||||
return _mm512_shuffle_pd(tmp,tmp,0x55);
|
||||
}
|
||||
};
|
||||
|
||||
struct TimesI{
|
||||
//Complex single
|
||||
inline __m512 operator()(__m512 in, __m512 ret){
|
||||
__m512 tmp = _mm512_swizzle_ps(in, _MM_SWIZ_REG_CDAB);// OK
|
||||
return _mm512_mask_sub_ps(tmp,0xaaaa,_mm512_setzero_ps(),tmp); // real -imag
|
||||
__m512 tmp = _mm512_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||
return _mm512_mask_sub_ps(tmp,0xaaaa,_mm512_setzero_ps(),tmp);
|
||||
}
|
||||
//Complex double
|
||||
inline __m512d operator()(__m512d in, __m512d ret){
|
||||
__m512d tmp = _mm512_swizzle_pd(in, _MM_SWIZ_REG_CDAB);// OK
|
||||
return _mm512_mask_sub_pd(tmp,0xaa,_mm512_setzero_pd(),tmp); // real -imag
|
||||
__m512d tmp = _mm512_shuffle_pd(tmp,tmp,0x55);
|
||||
return _mm512_mask_sub_pd(tmp,0xaa,_mm512_setzero_pd(),tmp);
|
||||
}
|
||||
|
||||
|
||||
@ -255,6 +237,36 @@ namespace Optimization {
|
||||
|
||||
|
||||
|
||||
// Gpermute utilities consider coalescing into 1 Gpermute
|
||||
struct Permute{
|
||||
|
||||
static inline __m512 Permute0(__m512 in){
|
||||
return _mm512_shuffle_f32x4(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||
};
|
||||
static inline __m512 Permute1(__m512 in){
|
||||
return _mm512_shuffle_f32x4(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||
};
|
||||
static inline __m512 Permute2(__m512 in){
|
||||
return _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||
};
|
||||
static inline __m512 Permute3(__m512 in){
|
||||
return _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||
};
|
||||
|
||||
static inline __m512d Permute0(__m512d in){
|
||||
return _mm512_shuffle_f64x2(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||
};
|
||||
static inline __m512d Permute1(__m512d in){
|
||||
return _mm512_shuffle_f64x2(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||
};
|
||||
static inline __m512d Permute2(__m512d in){
|
||||
return _mm512_shuffle_pd(in,in,0x55);
|
||||
};
|
||||
static inline __m512d Permute3(__m512d in){
|
||||
return in;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////
|
||||
@ -314,25 +326,6 @@ namespace Grid {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Gpermute utilities consider coalescing into 1 Gpermute
|
||||
template < typename VectorSIMD >
|
||||
inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
|
||||
union {
|
||||
__m512 f;
|
||||
decltype(VectorSIMD::v) v;
|
||||
} conv;
|
||||
conv.v = b.v;
|
||||
switch(perm){
|
||||
case 3: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break;
|
||||
case 2: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break;
|
||||
case 1 : conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
|
||||
case 0 : conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
y.v=conv.v;
|
||||
};
|
||||
|
||||
// Function name aliases
|
||||
typedef Optimization::Vsplat VsplatSIMD;
|
||||
|
365
lib/simd/Grid_imci.h
Normal file
365
lib/simd/Grid_imci.h
Normal file
@ -0,0 +1,365 @@
|
||||
//----------------------------------------------------------------------
|
||||
/*! @file Grid_knc.h
|
||||
@brief Optimization libraries for AVX512 instructions set for KNC
|
||||
|
||||
Using intrinsics
|
||||
*/
|
||||
// Time-stamp: <2015-06-09 14:27:28 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
//#ifndef KNC_ONLY_STORES
|
||||
//#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
|
||||
//#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
|
||||
//#endif
|
||||
|
||||
|
||||
namespace Optimization {
|
||||
|
||||
struct Vsplat{
|
||||
//Complex float
|
||||
inline __m512 operator()(float a, float b){
|
||||
return _mm512_set_ps(b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a);
|
||||
}
|
||||
// Real float
|
||||
inline __m512 operator()(float a){
|
||||
return _mm512_set1_ps(a);
|
||||
}
|
||||
//Complex double
|
||||
inline __m512d operator()(double a, double b){
|
||||
return _mm512_set_pd(b,a,b,a,b,a,b,a);
|
||||
}
|
||||
//Real double
|
||||
inline __m512d operator()(double a){
|
||||
return _mm512_set1_pd(a);
|
||||
}
|
||||
//Integer
|
||||
inline __m512i operator()(Integer a){
|
||||
return _mm512_set1_epi32(a);
|
||||
}
|
||||
};
|
||||
|
||||
struct Vstore{
|
||||
//Float
|
||||
inline void operator()(__m512 a, float* F){
|
||||
_mm512_store_ps(F,a);
|
||||
}
|
||||
//Double
|
||||
inline void operator()(__m512d a, double* D){
|
||||
_mm512_store_pd(D,a);
|
||||
}
|
||||
//Integer
|
||||
inline void operator()(__m512i a, Integer* I){
|
||||
_mm512_store_si512((__m512i *)I,a);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
struct Vstream{
|
||||
//Float
|
||||
inline void operator()(float * a, __m512 b){
|
||||
_mm512_storenrngo_ps(a,b);
|
||||
}
|
||||
//Double
|
||||
inline void operator()(double * a, __m512d b){
|
||||
_mm512_storenrngo_pd(a,b);
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct Vset{
|
||||
// Complex float
|
||||
inline __m512 operator()(Grid::ComplexF *a){
|
||||
return _mm512_set_ps(a[7].imag(),a[7].real(),a[6].imag(),a[6].real(),
|
||||
a[5].imag(),a[5].real(),a[4].imag(),a[4].real(),
|
||||
a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),
|
||||
a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
||||
}
|
||||
// Complex double
|
||||
inline __m512d operator()(Grid::ComplexD *a){
|
||||
return _mm512_set_pd(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),
|
||||
a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
||||
}
|
||||
// Real float
|
||||
inline __m512 operator()(float *a){
|
||||
return _mm512_set_ps( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
|
||||
a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||
}
|
||||
// Real double
|
||||
inline __m512d operator()(double *a){
|
||||
return _mm512_set_pd(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||
}
|
||||
// Integer
|
||||
inline __m512i operator()(Integer *a){
|
||||
return _mm512_set_epi32( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
|
||||
a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template <typename Out_type, typename In_type>
|
||||
struct Reduce{
|
||||
//Need templated class to overload output type
|
||||
//General form must generate error if compiled
|
||||
inline Out_type operator()(In_type in){
|
||||
printf("Error, using wrong Reduce function\n");
|
||||
exit(1);
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Arithmetic operations
|
||||
/////////////////////////////////////////////////////
|
||||
struct Sum{
|
||||
//Complex/Real float
|
||||
inline __m512 operator()(__m512 a, __m512 b){
|
||||
return _mm512_add_ps(a,b);
|
||||
}
|
||||
//Complex/Real double
|
||||
inline __m512d operator()(__m512d a, __m512d b){
|
||||
return _mm512_add_pd(a,b);
|
||||
}
|
||||
//Integer
|
||||
inline __m512i operator()(__m512i a, __m512i b){
|
||||
return _mm512_add_epi32(a,b);
|
||||
}
|
||||
};
|
||||
|
||||
struct Sub{
|
||||
//Complex/Real float
|
||||
inline __m512 operator()(__m512 a, __m512 b){
|
||||
return _mm512_sub_ps(a,b);
|
||||
}
|
||||
//Complex/Real double
|
||||
inline __m512d operator()(__m512d a, __m512d b){
|
||||
return _mm512_sub_pd(a,b);
|
||||
}
|
||||
//Integer
|
||||
inline __m512i operator()(__m512i a, __m512i b){
|
||||
return _mm512_sub_epi32(a,b);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct MultComplex{
|
||||
// Complex float
|
||||
inline __m512 operator()(__m512 a, __m512 b){
|
||||
__m512 vzero,ymm0,ymm1,real, imag;
|
||||
vzero = _mm512_setzero_ps();
|
||||
ymm0 = _mm512_swizzle_ps(a, _MM_SWIZ_REG_CDAB); //
|
||||
real = (__m512)_mm512_mask_or_epi32((__m512i)a, 0xAAAA,(__m512i)vzero,(__m512i)ymm0);
|
||||
imag = _mm512_mask_sub_ps(a, 0x5555,vzero, ymm0);
|
||||
ymm1 = _mm512_mul_ps(real, b);
|
||||
ymm0 = _mm512_swizzle_ps(b, _MM_SWIZ_REG_CDAB); // OK
|
||||
return _mm512_fmadd_ps(ymm0,imag,ymm1);
|
||||
}
|
||||
// Complex double
|
||||
inline __m512d operator()(__m512d a, __m512d b){
|
||||
/* This is from
|
||||
* Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets
|
||||
* @inproceedings{McFarlin:2011:ASV:1995896.1995938,
|
||||
* author = {McFarlin, Daniel S. and Arbatov, Volodymyr and Franchetti, Franz and P\"{u}schel, Markus},
|
||||
* title = {Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets},
|
||||
* booktitle = {Proceedings of the International Conference on Supercomputing},
|
||||
* series = {ICS '11},
|
||||
* year = {2011},
|
||||
* isbn = {978-1-4503-0102-2},
|
||||
* location = {Tucson, Arizona, USA},
|
||||
* pages = {265--274},
|
||||
* numpages = {10},
|
||||
* url = {http://doi.acm.org/10.1145/1995896.1995938},
|
||||
* doi = {10.1145/1995896.1995938},
|
||||
* acmid = {1995938},
|
||||
* publisher = {ACM},
|
||||
* address = {New York, NY, USA},
|
||||
* keywords = {autovectorization, fourier transform, program generation, simd, super-optimization},
|
||||
* }
|
||||
*/
|
||||
__m512d vzero,ymm0,ymm1,real,imag;
|
||||
vzero =_mm512_setzero_pd();
|
||||
ymm0 = _mm512_swizzle_pd(a, _MM_SWIZ_REG_CDAB); //
|
||||
real =(__m512d)_mm512_mask_or_epi64((__m512i)a, 0xAA,(__m512i)vzero,(__m512i) ymm0);
|
||||
imag = _mm512_mask_sub_pd(a, 0x55,vzero, ymm0);
|
||||
ymm1 = _mm512_mul_pd(real, b);
|
||||
ymm0 = _mm512_swizzle_pd(b, _MM_SWIZ_REG_CDAB); // OK
|
||||
return _mm512_fmadd_pd(ymm0,imag,ymm1);
|
||||
}
|
||||
};
|
||||
|
||||
struct Mult{
|
||||
// Real float
|
||||
inline __m512 operator()(__m512 a, __m512 b){
|
||||
return _mm512_mul_ps(a,b);
|
||||
}
|
||||
// Real double
|
||||
inline __m512d operator()(__m512d a, __m512d b){
|
||||
return _mm512_mul_pd(a,b);
|
||||
}
|
||||
// Integer
|
||||
inline __m512i operator()(__m512i a, __m512i b){
|
||||
return _mm512_mullo_epi32(a,b);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct Conj{
|
||||
// Complex single
|
||||
inline __m512 operator()(__m512 in){
|
||||
return _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // Zero out 0+real 0-imag
|
||||
}
|
||||
// Complex double
|
||||
inline __m512d operator()(__m512d in){
|
||||
return _mm512_mask_sub_pd(in, 0xaa,_mm512_setzero_pd(), in);
|
||||
}
|
||||
// do not define for integer input
|
||||
};
|
||||
|
||||
struct TimesMinusI{
|
||||
//Complex single
|
||||
inline __m512 operator()(__m512 in, __m512 ret){
|
||||
__m512 tmp = _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // real -imag
|
||||
return _mm512_swizzle_ps(tmp, _MM_SWIZ_REG_CDAB);// OK
|
||||
}
|
||||
//Complex double
|
||||
inline __m512d operator()(__m512d in, __m512d ret){
|
||||
__m512d tmp = _mm512_mask_sub_pd(in,0xaa,_mm512_setzero_pd(),in); // real -imag
|
||||
return _mm512_swizzle_pd(tmp, _MM_SWIZ_REG_CDAB);// OK
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct TimesI{
|
||||
//Complex single
|
||||
inline __m512 operator()(__m512 in, __m512 ret){
|
||||
__m512 tmp = _mm512_swizzle_ps(in, _MM_SWIZ_REG_CDAB);// OK
|
||||
return _mm512_mask_sub_ps(tmp,0xaaaa,_mm512_setzero_ps(),tmp); // real -imag
|
||||
}
|
||||
//Complex double
|
||||
inline __m512d operator()(__m512d in, __m512d ret){
|
||||
__m512d tmp = _mm512_swizzle_pd(in, _MM_SWIZ_REG_CDAB);// OK
|
||||
return _mm512_mask_sub_pd(tmp,0xaa,_mm512_setzero_pd(),tmp); // real -imag
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
struct Permute{
|
||||
|
||||
static inline __m512 Permute0(__m512 in){
|
||||
return _mm512_permute4f128_ps(in,(_MM_PERM_ENUM)_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||
};
|
||||
static inline __m512 Permute1(__m512 in){
|
||||
return _mm512_permute4f128_ps(in,(_MM_PERM_ENUM)_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||
};
|
||||
static inline __m512 Permute2(__m512 in){
|
||||
return _mm512_swizzle_ps(in,_MM_SWIZ_REG_BADC);
|
||||
};
|
||||
static inline __m512 Permute3(__m512 in){
|
||||
return _mm512_swizzle_ps(in,_MM_SWIZ_REG_CDAB);
|
||||
};
|
||||
|
||||
static inline __m512d Permute0(__m512d in){// Hack no intrinsic for 256 swaps of __m512d
|
||||
return (__m512d)_mm512_permute4f128_ps((__m512)in,(_MM_PERM_ENUM)_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||
};
|
||||
static inline __m512d Permute1(__m512d in){
|
||||
return _mm512_swizzle_pd(in,_MM_SWIZ_REG_BADC);
|
||||
};
|
||||
static inline __m512d Permute2(__m512d in){
|
||||
return _mm512_swizzle_pd(in,_MM_SWIZ_REG_CDAB);
|
||||
};
|
||||
static inline __m512d Permute3(__m512d in){
|
||||
return in;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Some Template specialization
|
||||
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
|
||||
return Grid::ComplexF(_mm512_mask_reduce_add_ps(0x5555, in),_mm512_mask_reduce_add_ps(0xAAAA, in));
|
||||
}
|
||||
//Real float Reduce
|
||||
template<>
|
||||
inline Grid::RealF Reduce<Grid::RealF, __m512>::operator()(__m512 in){
|
||||
return _mm512_reduce_add_ps(in);
|
||||
}
|
||||
|
||||
|
||||
//Complex double Reduce
|
||||
template<>
|
||||
inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){
|
||||
return Grid::ComplexD(_mm512_mask_reduce_add_pd(0x55, in),_mm512_mask_reduce_add_pd(0xAA, in));
|
||||
}
|
||||
|
||||
//Real double Reduce
|
||||
template<>
|
||||
inline Grid::RealD Reduce<Grid::RealD, __m512d>::operator()(__m512d in){
|
||||
return _mm512_reduce_add_pd(in);
|
||||
}
|
||||
|
||||
//Integer Reduce
|
||||
template<>
|
||||
inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){
|
||||
// FIXME unimplemented
|
||||
printf("Reduce : Missing integer implementation -> FIX\n");
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Here assign types
|
||||
namespace Grid {
|
||||
typedef __m512 SIMD_Ftype; // Single precision type
|
||||
typedef __m512d SIMD_Dtype; // Double precision type
|
||||
typedef __m512i SIMD_Itype; // Integer type
|
||||
|
||||
// prefecth
|
||||
inline void v_prefetch0(int size, const char *ptr){
|
||||
for(int i=0;i<size;i+=64){ // Define L1 linesize above
|
||||
_mm_prefetch(ptr+i+4096,_MM_HINT_T1);
|
||||
_mm_prefetch(ptr+i+512,_MM_HINT_T0);
|
||||
}
|
||||
}
|
||||
inline void prefetch_HINT_T0(const char *ptr){
|
||||
_mm_prefetch(ptr,_MM_HINT_T0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Function name aliases
|
||||
typedef Optimization::Vsplat VsplatSIMD;
|
||||
typedef Optimization::Vstore VstoreSIMD;
|
||||
typedef Optimization::Vset VsetSIMD;
|
||||
typedef Optimization::Vstream VstreamSIMD;
|
||||
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||
|
||||
|
||||
// Arithmetic operations
|
||||
typedef Optimization::Sum SumSIMD;
|
||||
typedef Optimization::Sub SubSIMD;
|
||||
typedef Optimization::Mult MultSIMD;
|
||||
typedef Optimization::MultComplex MultComplexSIMD;
|
||||
typedef Optimization::Conj ConjSIMD;
|
||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||
typedef Optimization::TimesI TimesISIMD;
|
||||
|
||||
}
|
@ -151,10 +151,10 @@ namespace Optimization {
|
||||
// Complex float
|
||||
inline __m128 operator()(__m128 a, __m128 b){
|
||||
__m128 ymm0,ymm1,ymm2;
|
||||
ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
|
||||
ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar,
|
||||
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||
ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
|
||||
ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
|
||||
ymm1 = _mm_shuffle_ps(b,b,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // ymm1 <- br,bi
|
||||
ymm2 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(3,3,1,1)); // ymm2 <- ai,ai
|
||||
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||
return _mm_addsub_ps(ymm0,ymm1);
|
||||
}
|
||||
@ -201,7 +201,7 @@ namespace Optimization {
|
||||
//Complex single
|
||||
inline __m128 operator()(__m128 in, __m128 ret){
|
||||
__m128 tmp =_mm_addsub_ps(_mm_setzero_ps(),in); // r,-i
|
||||
return _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||
return _mm_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||
}
|
||||
//Complex double
|
||||
inline __m128d operator()(__m128d in, __m128d ret){
|
||||
@ -215,7 +215,7 @@ namespace Optimization {
|
||||
struct TimesI{
|
||||
//Complex single
|
||||
inline __m128 operator()(__m128 in, __m128 ret){
|
||||
__m128 tmp =_mm_shuffle_ps(in,in,_MM_SHUFFLE(2,3,0,1));
|
||||
__m128 tmp =_mm_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||
return _mm_addsub_ps(_mm_setzero_ps(),tmp); // r,-i
|
||||
}
|
||||
//Complex double
|
||||
@ -225,27 +225,45 @@ namespace Optimization {
|
||||
}
|
||||
};
|
||||
|
||||
struct Permute{
|
||||
|
||||
static inline __m128 Permute0(__m128 in){
|
||||
return _mm_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2));
|
||||
};
|
||||
static inline __m128 Permute1(__m128 in){
|
||||
return _mm_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
|
||||
};
|
||||
static inline __m128 Permute2(__m128 in){
|
||||
return in;
|
||||
};
|
||||
static inline __m128 Permute3(__m128 in){
|
||||
return in;
|
||||
};
|
||||
|
||||
static inline __m128d Permute0(__m128d in){
|
||||
return _mm_shuffle_pd(in,in,0x1);
|
||||
};
|
||||
static inline __m128d Permute1(__m128d in){
|
||||
return in;
|
||||
};
|
||||
static inline __m128d Permute2(__m128d in){
|
||||
return in;
|
||||
};
|
||||
static inline __m128d Permute3(__m128d in){
|
||||
return in;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Some Template specialization
|
||||
template < typename vtype >
|
||||
void permute(vtype &a, vtype b, int perm) {
|
||||
uconv<vtype> conv;
|
||||
conv.v = b;
|
||||
switch(perm){
|
||||
case 3: break; //empty for SSE4
|
||||
case 2: break; //empty for SSE4
|
||||
case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
|
||||
case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
a=conv.v;
|
||||
};
|
||||
|
||||
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m128>::operator()(__m128 in){
|
||||
__m128 v1; // two complex
|
||||
Optimization::permute(v1,in,0);
|
||||
v1= Optimization::Permute::Permute0(in);
|
||||
v1= _mm_add_ps(v1,in);
|
||||
u128f conv; conv.v=v1;
|
||||
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
||||
@ -254,9 +272,9 @@ namespace Optimization {
|
||||
template<>
|
||||
inline Grid::RealF Reduce<Grid::RealF, __m128>::operator()(__m128 in){
|
||||
__m128 v1,v2; // quad single
|
||||
Optimization::permute(v1,in,0);
|
||||
v1= Optimization::Permute::Permute0(in);
|
||||
v1= _mm_add_ps(v1,in);
|
||||
Optimization::permute(v2,v1,1);
|
||||
v2= Optimization::Permute::Permute1(v1);
|
||||
v1 = _mm_add_ps(v1,v2);
|
||||
u128f conv; conv.v=v1;
|
||||
return conv.f[0];
|
||||
@ -274,7 +292,7 @@ namespace Optimization {
|
||||
template<>
|
||||
inline Grid::RealD Reduce<Grid::RealD, __m128d>::operator()(__m128d in){
|
||||
__m128d v1;
|
||||
Optimization::permute(v1,in,0); // avx 256; quad double
|
||||
v1 = Optimization::Permute::Permute0(in);
|
||||
v1 = _mm_add_pd(v1,in);
|
||||
u128d conv; conv.v = v1;
|
||||
return conv.f[0];
|
||||
@ -302,14 +320,6 @@ namespace Grid {
|
||||
inline void prefetch_HINT_T0(const char *ptr){
|
||||
_mm_prefetch(ptr,_MM_HINT_T0);
|
||||
}
|
||||
|
||||
|
||||
// Gpermute function
|
||||
template < typename VectorSIMD >
|
||||
inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
|
||||
Optimization::permute(y.v,b.v,perm);
|
||||
}
|
||||
|
||||
|
||||
// Function name aliases
|
||||
typedef Optimization::Vsplat VsplatSIMD;
|
||||
|
@ -19,6 +19,9 @@
|
||||
#if defined AVX512
|
||||
#include "Grid_avx512.h"
|
||||
#endif
|
||||
#if defined IMCI
|
||||
#include "Grid_imci.h"
|
||||
#endif
|
||||
#if defined QPX
|
||||
#include "Grid_qpx.h"
|
||||
#endif
|
||||
@ -248,30 +251,43 @@ namespace Grid {
|
||||
// all subtypes; may not be a good assumption, but could
|
||||
// add the vector width as a template param for BG/Q for example
|
||||
////////////////////////////////////////////////////////////////////
|
||||
friend inline void permute0(Grid_simd &y,Grid_simd b){
|
||||
y.v = Optimization::Permute::Permute0(b.v);
|
||||
}
|
||||
friend inline void permute1(Grid_simd &y,Grid_simd b){
|
||||
y.v = Optimization::Permute::Permute1(b.v);
|
||||
}
|
||||
friend inline void permute2(Grid_simd &y,Grid_simd b){
|
||||
y.v = Optimization::Permute::Permute2(b.v);
|
||||
}
|
||||
friend inline void permute3(Grid_simd &y,Grid_simd b){
|
||||
y.v = Optimization::Permute::Permute3(b.v);
|
||||
}
|
||||
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
||||
{
|
||||
Gpermute<Grid_simd>(y,b,perm);
|
||||
if (perm==3) permute3(y,b);
|
||||
else if (perm==2) permute2(y,b);
|
||||
else if (perm==1) permute1(y,b);
|
||||
else if (perm==0) permute0(y,b);
|
||||
}
|
||||
|
||||
|
||||
|
||||
};// end of Grid_simd class definition
|
||||
|
||||
|
||||
///////////////////////
|
||||
// Splat
|
||||
///////////////////////
|
||||
|
||||
// this is only for the complex version
|
||||
template <class S, class V, IfComplex<S> =0, class ABtype>
|
||||
inline void vsplat(Grid_simd<S,V> &ret,ABtype a, ABtype b){
|
||||
inline void vsplat(Grid_simd<S,V> &ret,ABtype a, ABtype b){
|
||||
ret.v = binary<V>(a, b, VsplatSIMD());
|
||||
}
|
||||
|
||||
// overload if complex
|
||||
template <class S,class V> inline void vsplat(Grid_simd<S,V> &ret, EnableIf<is_complex < S >, S> c) {
|
||||
Real a = real(c);
|
||||
Real b = imag(c);
|
||||
vsplat(ret,a,b);
|
||||
vsplat(ret,real(c),imag(c));
|
||||
}
|
||||
|
||||
//if real fill with a, if complex fill with a in the real part (first function above)
|
||||
@ -290,8 +306,8 @@ namespace Grid {
|
||||
template <class S,class V, IfComplex<S> = 0 > inline void vcomplex_i(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0,1.0));}
|
||||
|
||||
// if not complex overload here
|
||||
template <class S,class V, IfReal<S> = 0 > inline void vone (Grid_simd<S,V> &ret){ vsplat(ret,1.0); }
|
||||
template <class S,class V, IfReal<S> = 0 > inline void vzero(Grid_simd<S,V> &ret) { vsplat(ret,0.0); }
|
||||
template <class S,class V, IfReal<S> = 0 > inline void vone (Grid_simd<S,V> &ret){ vsplat(ret,S(1.0)); }
|
||||
template <class S,class V, IfReal<S> = 0 > inline void vzero(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0)); }
|
||||
|
||||
// For integral types
|
||||
template <class S,class V,IfInteger<S> = 0 > inline void vone(Grid_simd<S,V> &ret) {vsplat(ret,1); }
|
||||
@ -304,13 +320,18 @@ namespace Grid {
|
||||
///////////////////////
|
||||
// Vstream
|
||||
///////////////////////
|
||||
template <class S,class V, IfNotInteger<S> = 0 >
|
||||
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
||||
binary<void>((Real*)&out.v, in.v, VstreamSIMD());
|
||||
}
|
||||
template <class S,class V, IfReal<S> = 0 >
|
||||
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
||||
binary<void>((S *)&out.v, in.v, VstreamSIMD());
|
||||
}
|
||||
template <class S,class V, IfComplex<S> = 0 >
|
||||
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
||||
typedef typename S::value_type T;
|
||||
binary<void>((T *)&out.v, in.v, VstreamSIMD());
|
||||
}
|
||||
|
||||
template <class S,class V, IfInteger<S> = 0 >
|
||||
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
||||
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
||||
out=in;
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@ template<class vtype> class iScalar
|
||||
public:
|
||||
vtype _internal;
|
||||
|
||||
typedef vtype element;
|
||||
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
|
||||
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
|
||||
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
|
||||
@ -124,6 +125,7 @@ template<class vtype,int N> class iVector
|
||||
public:
|
||||
vtype _internal[N];
|
||||
|
||||
typedef vtype element;
|
||||
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
|
||||
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
|
||||
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
|
||||
@ -175,6 +177,7 @@ public:
|
||||
permute(out._internal[i],in._internal[i],permutetype);
|
||||
}
|
||||
}
|
||||
|
||||
// Unary negation
|
||||
friend strong_inline iVector<vtype,N> operator -(const iVector<vtype,N> &r) {
|
||||
iVector<vtype,N> ret;
|
||||
@ -219,6 +222,7 @@ template<class vtype,int N> class iMatrix
|
||||
public:
|
||||
vtype _internal[N][N];
|
||||
|
||||
typedef vtype element;
|
||||
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
|
||||
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
|
||||
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
|
||||
@ -287,12 +291,15 @@ public:
|
||||
vstream(out._internal[i][j],in._internal[i][j]);
|
||||
}}
|
||||
}
|
||||
|
||||
friend strong_inline void permute(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int permutetype){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
permute(out._internal[i][j],in._internal[i][j],permutetype);
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
// Unary negation
|
||||
friend strong_inline iMatrix<vtype,N> operator -(const iMatrix<vtype,N> &r) {
|
||||
iMatrix<vtype,N> ret;
|
||||
|
@ -35,19 +35,22 @@ icpc-avx-openmp-mpi)
|
||||
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp -lgmp -lmpfr" --enable-comms=mpi
|
||||
;;
|
||||
icpc-avx-openmp)
|
||||
CXX=icpc ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -std=c++11" LIBS="-fopenmp -lgmp -lmpfr" --enable-comms=mpi
|
||||
CXX=icpc ../../configure --enable-precision=single --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -std=c++11" LIBS="-fopenmp -lgmp -lmpfr" --enable-comms=mpi
|
||||
;;
|
||||
icpc-avx2)
|
||||
CXX=icpc ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
CXX=icpc ../../configure --enable-simd=AVX2 CXXFLAGS="-march=core-avx2 -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
;;
|
||||
icpc-avx512)
|
||||
CXX=icpc ../../configure --enable-simd=AVX512 CXXFLAGS="-xCOMMON-AVX512 -O3 -std=c++11" --host=none LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
;;
|
||||
icpc-mic)
|
||||
CXX=icpc ../../configure --host=none --enable-simd=AVX512 CXXFLAGS="-mmic -O3 -std=c++11" LDFLAGS=-mmic LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
CXX=icpc ../../configure --host=none --enable-simd=IMCI CXXFLAGS="-mmic -O3 -std=c++11" LDFLAGS=-mmic LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
;;
|
||||
icpc-mic-avx512)
|
||||
CXX=icpc ../../configure --host=none --enable-simd=IMCI CXXFLAGS="-xCOMMON_AVX512 -O3 -std=c++11" LDFLAGS=-xCOMMON_AVX512 LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
;;
|
||||
clang-sse)
|
||||
CXX=clang++ ../../configure --enable-simd=SSE4 CXXFLAGS="-msse4 -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
CXX=clang++ ../../configure --enable-precision=single --enable-simd=SSE4 CXXFLAGS="-msse4 -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
;;
|
||||
clang-avx)
|
||||
CXX=clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
|
||||
|
@ -1,6 +1,35 @@
|
||||
#!/bin/bash
|
||||
|
||||
LOG=$1
|
||||
SWEEPS=$2
|
||||
grep Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$10} END { print S/NR} '
|
||||
grep dH $LOG | tail -n $SWEEPS | awk '{ S=S+exp(-$10)} END { print S/NR} '
|
||||
SWEEPS=`grep dH $LOG | wc -l`
|
||||
SWEEPS=`expr $SWEEPS - 80`
|
||||
echo
|
||||
echo $SWEEPS thermalised sweeps
|
||||
echo
|
||||
plaq=`grep Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$10} END { print S/NR} ' `
|
||||
plaqe=`grep Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$10 ; SS=SS+$10*$10 } END { print sqrt( (SS/NR - S*S/NR/NR)/NR) } ' `
|
||||
echo "Plaquette: $plaq (${plaqe})"
|
||||
echo
|
||||
|
||||
dHv=`grep dH $LOG | tail -n $SWEEPS | awk '{ S=S+$10 ; SS=SS+$10*$10 } END { print sqrt(SS/NR) } ' `
|
||||
edH=`grep dH $LOG | tail -n $SWEEPS | awk '{ S=S+exp(-$10)} END { print S/NR} '`
|
||||
echo "<e-dH>: $edH"
|
||||
echo "<rms dH>: $dHv"
|
||||
|
||||
TRAJ=`grep Acc $LOG | wc -l`
|
||||
ACC=`grep Acc $LOG | grep ACCE | wc -l`
|
||||
PACC=`expr 100 \* ${ACC} / ${TRAJ} `
|
||||
echo
|
||||
echo "Acceptance $PACC % $ACC / $TRAJ "
|
||||
|
||||
grep Plaq $LOG | awk '{ print $10 }' | uniq > plaq.dat
|
||||
grep dH $LOG | awk '{ print $10 }' > dH.dat
|
||||
echo set yrange [-0.1:0.6] > plot.gnu
|
||||
echo set terminal 'pdf' >> plot.gnu
|
||||
echo "set output 'plaq.${LOG}.pdf'" >> plot.gnu
|
||||
echo "plot 'plaq.dat' w l, 'dH.dat' w l " >> plot.gnu
|
||||
echo
|
||||
gnuplot plot.gnu >& gnu.errs
|
||||
open plaq.${LOG}.pdf
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@ shift
|
||||
while (( "$#" )); do
|
||||
|
||||
echo $1
|
||||
sed -e "s:${WAS}:${IS}:g" -i .bak $1
|
||||
sed -e "s@${WAS}@${IS}@g" -i .bak $1
|
||||
|
||||
shift
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
|
||||
bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_hdcr Test_gamma Test_gparity Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
|
||||
bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_hdcr Test_gamma Test_hmc_EODWFRatio Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
|
||||
|
||||
|
||||
Test_GaugeAction_SOURCES=Test_GaugeAction.cc
|
||||
@ -86,12 +86,16 @@ Test_dwf_hdcr_SOURCES=Test_dwf_hdcr.cc
|
||||
Test_dwf_hdcr_LDADD=-lGrid
|
||||
|
||||
|
||||
#Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
|
||||
#Test_dwf_lanczos_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_gamma_SOURCES=Test_gamma.cc
|
||||
Test_gamma_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_gparity_SOURCES=Test_gparity.cc
|
||||
Test_gparity_LDADD=-lGrid
|
||||
#Test_gparity_SOURCES=Test_gparity.cc
|
||||
#Test_gparity_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
|
||||
@ -174,6 +178,10 @@ Test_rng_fixed_SOURCES=Test_rng_fixed.cc
|
||||
Test_rng_fixed_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_serialisation_SOURCES=Test_serialisation.cc
|
||||
Test_serialisation_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_simd_SOURCES=Test_simd.cc
|
||||
Test_simd_LDADD=-lGrid
|
||||
|
||||
|
@ -50,7 +50,7 @@ int main (int argc, char ** argv)
|
||||
NerscField header;
|
||||
|
||||
std::string file("./ckpoint_lat.4000");
|
||||
readNerscConfiguration(Umu,header,file);
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
|
@ -42,7 +42,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
NerscField header;
|
||||
std::string file("./ckpoint_lat.400");
|
||||
readNerscConfiguration(Umu,header,file);
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
// SU3::ColdConfiguration(RNG4,Umu);
|
||||
// SU3::TepidConfiguration(RNG4,Umu);
|
||||
|
@ -388,7 +388,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
NerscField header;
|
||||
std::string file("./ckpoint_lat.4000");
|
||||
readNerscConfiguration(Umu,header,file);
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
// SU3::ColdConfiguration(RNG4,Umu);
|
||||
// SU3::TepidConfiguration(RNG4,Umu);
|
||||
|
57
tests/Test_dwf_lanczos.cc
Normal file
57
tests/Test_dwf_lanczos.cc
Normal file
@ -0,0 +1,57 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
const int Ls=8;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeFermion src(FGrid); gaussian(RNG5,src);
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5=1.8;
|
||||
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
|
||||
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
|
||||
const int Nk = 10;
|
||||
const int Np = 1;
|
||||
RealD enorm = 1.0;
|
||||
RealD vthrs = 1;
|
||||
const int Nit= 1000;
|
||||
|
||||
ImplicitlyRestartedLanczos<LatticeFermion> IRL(HermOp,PolyX,
|
||||
Nk,Np,enorm,vthrs,Nit);
|
||||
|
||||
|
||||
std::vector<RealD> eval(Nk);
|
||||
std::vector<LatticeFermion> evec(Nk,FGrid);
|
||||
IRL.calc(eval,evec,
|
||||
src,
|
||||
Nsbt,
|
||||
Nconv);
|
||||
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -17,7 +17,9 @@ int main (int argc, char ** argv)
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
GridSerialRNG sRNG;
|
||||
GridParallelRNG pRNG(UGrid);
|
||||
sRNG.SeedRandomDevice();
|
||||
pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeLorentzColourMatrix U(UGrid);
|
||||
@ -25,7 +27,7 @@ int main (int argc, char ** argv)
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
|
||||
// simplify template declaration? Strip the lorentz from the second template
|
||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
||||
WilsonGaugeActionR Waction(5.6);
|
||||
|
||||
Real mass=0.04;
|
||||
Real pv =1.0;
|
||||
@ -38,21 +40,20 @@ int main (int argc, char ** argv)
|
||||
TwoFlavourEvenOddRatioPseudoFermionAction<WilsonImplR> Nf2(NumOp, DenOp,CG,CG);
|
||||
|
||||
//Collect actions
|
||||
ActionLevel Level1;
|
||||
ActionLevel<LatticeGaugeField> Level1;
|
||||
Level1.push_back(&Nf2);
|
||||
Level1.push_back(&Waction);
|
||||
ActionSet FullSet;
|
||||
ActionSet<LatticeGaugeField> FullSet;
|
||||
FullSet.push_back(Level1);
|
||||
|
||||
// Create integrator
|
||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
||||
IntegratorParameters MDpar(12,20,1.0);
|
||||
Integrator<IntegratorAlgorithm> MDynamics(UGrid,MDpar, FullSet);
|
||||
typedef MinimumNorm2<LatticeGaugeField> IntegratorType;// change here to change the algorithm
|
||||
IntegratorParameters MDpar(20);
|
||||
IntegratorType MDynamics(UGrid,MDpar, FullSet);
|
||||
|
||||
// Create HMC
|
||||
HMCparameters HMCpar;
|
||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
||||
HybridMonteCarlo<LatticeGaugeField,IntegratorType> HMC(HMCpar, MDynamics,sRNG,pRNG);
|
||||
|
||||
// Run it
|
||||
HMC.evolve(U);
|
||||
|
@ -15,40 +15,50 @@ int main (int argc, char ** argv)
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> seeds({6,7,8,80});
|
||||
GridParallelRNG pRNG(&Fine);
|
||||
pRNG.SeedRandomDevice();
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
std::vector<int> seedsS({1,2,3,4});
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedFixedIntegers(seedsS);
|
||||
|
||||
LatticeLorentzColourMatrix U(&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
|
||||
// simplify template declaration? Strip the lorentz from the second template
|
||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
||||
WilsonGaugeActionR Waction(5.6);
|
||||
|
||||
Real mass=-0.77;
|
||||
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
||||
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||
ConjugateGradient<LatticeFermion> CGmd(1.0e-6,10000);
|
||||
|
||||
TwoFlavourEvenOddPseudoFermionAction<WilsonImplR> WilsonNf2(FermOp,CG,CG);
|
||||
|
||||
//Collect actions
|
||||
ActionLevel Level1;
|
||||
ActionLevel<LatticeGaugeField> Level1(1);
|
||||
ActionLevel<LatticeGaugeField> Level2(4);
|
||||
Level1.push_back(&WilsonNf2);
|
||||
Level1.push_back(&Waction);
|
||||
ActionSet FullSet;
|
||||
Level2.push_back(&Waction);
|
||||
|
||||
ActionSet<LatticeGaugeField> FullSet;
|
||||
FullSet.push_back(Level1);
|
||||
FullSet.push_back(Level2);
|
||||
|
||||
// Create integrator
|
||||
// typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
||||
typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||
IntegratorParameters MDpar(12,40,1.0);
|
||||
std::vector<int> rel ={1};
|
||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
||||
//typedef LeapFrog<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||
//typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||
typedef ForceGradient<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||
|
||||
IntegratorParameters MDpar(16);
|
||||
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||
|
||||
// Create HMC
|
||||
HMCparameters HMCpar;
|
||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
||||
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics,sRNG,pRNG);
|
||||
|
||||
HMC.evolve(U);
|
||||
|
||||
|
@ -15,14 +15,21 @@ int main (int argc, char ** argv)
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> seeds({6,7,8,80});
|
||||
GridParallelRNG pRNG(&Fine);
|
||||
pRNG.SeedRandomDevice();
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
std::vector<int> seedsS({1,2,3,4});
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedFixedIntegers(seedsS);
|
||||
|
||||
LatticeLorentzColourMatrix U(&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
|
||||
// simplify template declaration? Strip the lorentz from the second template
|
||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
||||
WilsonGaugeActionR Waction(5.6);
|
||||
|
||||
Real mass=-0.77;
|
||||
Real pv =0.0;
|
||||
@ -34,22 +41,22 @@ int main (int argc, char ** argv)
|
||||
TwoFlavourEvenOddRatioPseudoFermionAction<WilsonImplR> WilsonNf2(NumOp, DenOp,CG,CG);
|
||||
|
||||
//Collect actions
|
||||
ActionLevel Level1;
|
||||
ActionLevel<LatticeGaugeField> Level1;
|
||||
Level1.push_back(&WilsonNf2);
|
||||
Level1.push_back(&Waction);
|
||||
ActionSet FullSet;
|
||||
ActionSet<LatticeGaugeField> FullSet;
|
||||
FullSet.push_back(Level1);
|
||||
|
||||
|
||||
// Create integrator
|
||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
||||
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||
IntegratorParameters MDpar(12,20,1.0);
|
||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
||||
IntegratorParameters MDpar(20);
|
||||
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||
|
||||
// Create HMC
|
||||
HMCparameters HMCpar;
|
||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
||||
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics,sRNG,pRNG);
|
||||
|
||||
HMC.evolve(U);
|
||||
|
||||
|
@ -15,40 +15,49 @@ int main (int argc, char ** argv)
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> seeds({6,7,8,80});
|
||||
GridParallelRNG pRNG(&Fine);
|
||||
pRNG.SeedRandomDevice();
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
std::vector<int> seedsS({1,2,3,4});
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedFixedIntegers(seedsS);
|
||||
|
||||
LatticeLorentzColourMatrix U(&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
|
||||
// simplify template declaration? Strip the lorentz from the second template
|
||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
||||
WilsonGaugeActionR Waction(5.6);
|
||||
|
||||
Real mass=-0.77;
|
||||
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
||||
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||
ConjugateGradient<LatticeFermion> CGmd(1.0e-6,10000);
|
||||
|
||||
TwoFlavourPseudoFermionAction<WilsonImplR> WilsonNf2(FermOp,CG,CG);
|
||||
|
||||
//Collect actions
|
||||
ActionLevel Level1;
|
||||
ActionLevel<LatticeGaugeField> Level1(1);
|
||||
ActionLevel<LatticeGaugeField> Level2(4);
|
||||
Level1.push_back(&WilsonNf2);
|
||||
Level1.push_back(&Waction);
|
||||
ActionSet FullSet;
|
||||
// Level1.push_back(&Waction);
|
||||
|
||||
ActionSet<LatticeGaugeField> FullSet;
|
||||
FullSet.push_back(Level1);
|
||||
// FullSet.push_back(Level2);
|
||||
|
||||
// Create integrator
|
||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
||||
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||
IntegratorParameters MDpar(12,20,1.0);
|
||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
||||
IntegratorParameters MDpar(20);
|
||||
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||
|
||||
// Create HMC
|
||||
HMCparameters HMCpar;
|
||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
||||
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics,sRNG,pRNG);
|
||||
|
||||
HMC.evolve(U);
|
||||
|
||||
|
@ -22,30 +22,37 @@ int main (int argc, char ** argv)
|
||||
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
|
||||
std::vector<int> seeds({6,7,8,80});
|
||||
GridParallelRNG pRNG(&Fine);
|
||||
pRNG.SeedRandomDevice();
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
std::vector<int> seedsS({1,2,3,4});
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedFixedIntegers(seedsS);
|
||||
|
||||
LatticeGaugeField U(&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
|
||||
|
||||
// simplify template declaration? Strip the lorentz from the second template
|
||||
WilsonGaugeAction<LatticeGaugeField, LatticeColourMatrix> Waction(6.0);
|
||||
WilsonGaugeActionR Waction(6.0);
|
||||
|
||||
//Collect actions
|
||||
ActionLevel Level1;
|
||||
ActionLevel<LatticeGaugeField> Level1;
|
||||
Level1.push_back(&Waction);
|
||||
ActionSet FullSet;
|
||||
ActionSet<LatticeGaugeField> FullSet;
|
||||
FullSet.push_back(Level1);
|
||||
|
||||
// Create integrator
|
||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to modify the algorithm
|
||||
IntegratorParameters MDpar(12,5,1.0);
|
||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
||||
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to modify the algorithm
|
||||
IntegratorParameters MDpar(20);
|
||||
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||
|
||||
// Create HMC
|
||||
HMCparameters HMCpar;
|
||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
||||
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||
|
||||
HMC.evolve(U);
|
||||
|
||||
|
@ -15,14 +15,21 @@ int main (int argc, char ** argv)
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> seeds({6,7,8,80});
|
||||
GridParallelRNG pRNG(&Fine);
|
||||
pRNG.SeedRandomDevice();
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
std::vector<int> seedsS({1,2,3,4});
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedFixedIntegers(seedsS);
|
||||
|
||||
LatticeLorentzColourMatrix U(&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
|
||||
// simplify template declaration? Strip the lorentz from the second template
|
||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
||||
WilsonGaugeActionR Waction(5.6);
|
||||
|
||||
Real mass=-0.77;
|
||||
Real pv =0.0;
|
||||
@ -34,22 +41,22 @@ int main (int argc, char ** argv)
|
||||
TwoFlavourRatioPseudoFermionAction<WilsonImplR> WilsonNf2(NumOp,FermOp,CG,CG);
|
||||
|
||||
//Collect actions
|
||||
ActionLevel Level1;
|
||||
ActionLevel<LatticeGaugeField> Level1;
|
||||
Level1.push_back(&WilsonNf2);
|
||||
Level1.push_back(&Waction);
|
||||
ActionSet FullSet;
|
||||
ActionSet<LatticeGaugeField> FullSet;
|
||||
FullSet.push_back(Level1);
|
||||
|
||||
|
||||
// Create integrator
|
||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
||||
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||
IntegratorParameters MDpar(12,20,1.0);
|
||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
||||
IntegratorParameters MDpar(20);
|
||||
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||
|
||||
// Create HMC
|
||||
HMCparameters HMCpar;
|
||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
||||
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||
|
||||
HMC.evolve(U);
|
||||
|
||||
|
@ -17,7 +17,8 @@ public:
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
random(pRNG,sqrtscale);
|
||||
sqrtscale = real(sqrtscale)*3.0+0.5;// force real pos def
|
||||
sqrtscale = 0.5*(sqrtscale + conjugate(sqrtscale));
|
||||
sqrtscale = sqrtscale*3.0+0.5;// force real pos def
|
||||
scale = sqrtscale *sqrtscale; //scale should be bounded by 12.25
|
||||
|
||||
//
|
||||
|
@ -28,7 +28,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
NerscField header;
|
||||
std::string file("./ckpoint_lat.4000");
|
||||
readNerscConfiguration(Umu,header,file);
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
@ -89,6 +89,13 @@ int main (int argc, char ** argv)
|
||||
TComplex TcP = sum(cPlaq);
|
||||
Complex ll= TensorRemove(TcP);
|
||||
std::cout<<GridLogMessage << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl;
|
||||
|
||||
std::string clone2x3("./ckpoint_clone2x3.4000");
|
||||
std::string clone3x3("./ckpoint_clone3x3.4000");
|
||||
int precision32 = 0;
|
||||
|
||||
NerscIO::writeConfiguration(Umu,clone3x3,0,precision32);
|
||||
NerscIO::writeConfiguration(Umu,clone2x3,1,precision32);
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -73,8 +73,10 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
|
||||
PokeIndex<LorentzIndex>(Umu,link,mu);
|
||||
//reunitarise link;
|
||||
|
||||
//reunitarise link;
|
||||
ProjectOnGroup(Umu);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -15,14 +15,22 @@ int main (int argc, char ** argv)
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
|
||||
std::vector<int> seeds({6,7,8,80});
|
||||
GridParallelRNG pRNG(&Fine);
|
||||
pRNG.SeedRandomDevice();
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
std::vector<int> seedsS({1,2,3,4});
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedFixedIntegers(seedsS);
|
||||
|
||||
LatticeLorentzColourMatrix U(&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
|
||||
// simplify template declaration? Strip the lorentz from the second template
|
||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
||||
WilsonGaugeActionR Waction(5.6);
|
||||
|
||||
Real mass=-0.77;
|
||||
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
||||
@ -33,23 +41,23 @@ int main (int argc, char ** argv)
|
||||
OneFlavourEvenOddRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(FermOp,Params);
|
||||
|
||||
//Collect actions
|
||||
ActionLevel Level1;
|
||||
ActionLevel<LatticeGaugeField> Level1;
|
||||
Level1.push_back(&WilsonNf1a);
|
||||
Level1.push_back(&WilsonNf1b);
|
||||
Level1.push_back(&Waction);
|
||||
|
||||
ActionSet FullSet;
|
||||
ActionSet<LatticeGaugeField> FullSet;
|
||||
FullSet.push_back(Level1);
|
||||
|
||||
// Create integrator
|
||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
||||
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||
|
||||
IntegratorParameters MDpar(12,20,1.0);
|
||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
||||
IntegratorParameters MDpar(20);
|
||||
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||
|
||||
// Create HMC
|
||||
HMCparameters HMCpar;
|
||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
||||
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||
HMC.evolve(U);
|
||||
|
||||
}
|
||||
|
@ -15,14 +15,22 @@ int main (int argc, char ** argv)
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
|
||||
std::vector<int> seeds({6,7,8,80});
|
||||
GridParallelRNG pRNG(&Fine);
|
||||
pRNG.SeedRandomDevice();
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
std::vector<int> seedsS({1,2,3,4});
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedFixedIntegers(seedsS);
|
||||
|
||||
LatticeLorentzColourMatrix U(&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
|
||||
// simplify template declaration? Strip the lorentz from the second template
|
||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
||||
WilsonGaugeActionR Waction(5.6);
|
||||
|
||||
Real mass=-0.77;
|
||||
Real pv =0.0;
|
||||
@ -35,23 +43,23 @@ int main (int argc, char ** argv)
|
||||
OneFlavourEvenOddRatioRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(NumOp,DenOp,Params);
|
||||
|
||||
//Collect actions
|
||||
ActionLevel Level1;
|
||||
ActionLevel<LatticeGaugeField> Level1;
|
||||
Level1.push_back(&WilsonNf1a);
|
||||
Level1.push_back(&WilsonNf1b);
|
||||
Level1.push_back(&Waction);
|
||||
ActionSet FullSet;
|
||||
ActionSet<LatticeGaugeField> FullSet;
|
||||
FullSet.push_back(Level1);
|
||||
|
||||
|
||||
// Create integrator
|
||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
||||
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||
IntegratorParameters MDpar(12,20,1.0);
|
||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
||||
IntegratorParameters MDpar(20);
|
||||
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||
|
||||
// Create HMC
|
||||
HMCparameters HMCpar;
|
||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
||||
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||
|
||||
HMC.evolve(U);
|
||||
|
||||
|
@ -15,14 +15,22 @@ int main (int argc, char ** argv)
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
|
||||
std::vector<int> seeds({6,7,8,80});
|
||||
GridParallelRNG pRNG(&Fine);
|
||||
pRNG.SeedRandomDevice();
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
std::vector<int> seedsS({1,2,3,4});
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedFixedIntegers(seedsS);
|
||||
|
||||
LatticeLorentzColourMatrix U(&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
|
||||
// simplify template declaration? Strip the lorentz from the second template
|
||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
||||
WilsonGaugeActionR Waction(5.6);
|
||||
|
||||
Real mass=-0.77;
|
||||
WilsonFermionR FermOp(U,Fine,RBFine,mass);
|
||||
@ -33,23 +41,23 @@ int main (int argc, char ** argv)
|
||||
OneFlavourRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(FermOp,Params);
|
||||
|
||||
//Collect actions
|
||||
ActionLevel Level1;
|
||||
ActionLevel<LatticeGaugeField> Level1;
|
||||
Level1.push_back(&WilsonNf1a);
|
||||
Level1.push_back(&WilsonNf1b);
|
||||
Level1.push_back(&Waction);
|
||||
|
||||
ActionSet FullSet;
|
||||
ActionSet<LatticeGaugeField> FullSet;
|
||||
FullSet.push_back(Level1);
|
||||
|
||||
// Create integrator
|
||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
||||
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||
|
||||
IntegratorParameters MDpar(12,20,1.0);
|
||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
||||
IntegratorParameters MDpar(20);
|
||||
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||
|
||||
// Create HMC
|
||||
HMCparameters HMCpar;
|
||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
||||
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||
|
||||
HMC.evolve(U);
|
||||
|
||||
|
@ -15,14 +15,22 @@ int main (int argc, char ** argv)
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
|
||||
std::vector<int> seeds({6,7,8,80});
|
||||
GridParallelRNG pRNG(&Fine);
|
||||
pRNG.SeedRandomDevice();
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
std::vector<int> seedsS({1,2,3,4});
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedFixedIntegers(seedsS);
|
||||
|
||||
LatticeLorentzColourMatrix U(&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
|
||||
// simplify template declaration? Strip the lorentz from the second template
|
||||
WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
|
||||
WilsonGaugeActionR Waction(5.6);
|
||||
|
||||
Real mass=-0.77;
|
||||
Real pv =0.0;
|
||||
@ -35,23 +43,23 @@ int main (int argc, char ** argv)
|
||||
OneFlavourRatioRationalPseudoFermionAction<WilsonImplR> WilsonNf1b(NumOp,DenOp,Params);
|
||||
|
||||
//Collect actions
|
||||
ActionLevel Level1;
|
||||
ActionLevel<LatticeGaugeField> Level1;
|
||||
Level1.push_back(&WilsonNf1a);
|
||||
Level1.push_back(&WilsonNf1b);
|
||||
Level1.push_back(&Waction);
|
||||
ActionSet FullSet;
|
||||
ActionSet<LatticeGaugeField> FullSet;
|
||||
FullSet.push_back(Level1);
|
||||
|
||||
|
||||
// Create integrator
|
||||
typedef MinimumNorm2 IntegratorAlgorithm;// change here to change the algorithm
|
||||
typedef MinimumNorm2<LatticeGaugeField> IntegratorAlgorithm;// change here to change the algorithm
|
||||
// typedef LeapFrog IntegratorAlgorithm;// change here to change the algorithm
|
||||
IntegratorParameters MDpar(12,20,1.0);
|
||||
Integrator<IntegratorAlgorithm> MDynamics(&Fine,MDpar, FullSet);
|
||||
IntegratorParameters MDpar(20);
|
||||
IntegratorAlgorithm MDynamics(&Fine,MDpar, FullSet);
|
||||
|
||||
// Create HMC
|
||||
HMCparameters HMCpar;
|
||||
HybridMonteCarlo<IntegratorAlgorithm> HMC(HMCpar, MDynamics);
|
||||
HybridMonteCarlo<LatticeGaugeField,IntegratorAlgorithm> HMC(HMCpar, MDynamics, sRNG, pRNG);
|
||||
|
||||
HMC.evolve(U);
|
||||
|
||||
|
93
tests/Test_serialisation.cc
Normal file
93
tests/Test_serialisation.cc
Normal file
@ -0,0 +1,93 @@
|
||||
#include <Grid.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class myclass {
|
||||
public:
|
||||
|
||||
GRID_DECL_CLASS_MEMBERS(myclass,
|
||||
int, x,
|
||||
double, y,
|
||||
bool , b,
|
||||
std::string, name,
|
||||
std::vector<double>, array,
|
||||
std::vector<std::vector<double> >, twodimarray,
|
||||
);
|
||||
|
||||
myclass(){}
|
||||
myclass(int i) : array(4,5.1), twodimarray(3,std::vector<double>(2,1.23456)) {
|
||||
x=i;
|
||||
y=2*i;
|
||||
b=true;
|
||||
name="bother said pooh";
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
int16_t i16 = 1;
|
||||
uint16_t u16 = 2;
|
||||
int32_t i32 = 3;
|
||||
uint32_t u32 = 4;
|
||||
int64_t i64 = 5;
|
||||
uint64_t u64 = 6;
|
||||
float f = M_PI;
|
||||
double d = 2*M_PI;
|
||||
bool b = false;
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
int main(int argc,char **argv)
|
||||
{
|
||||
{
|
||||
XMLWriter WR("bother.xml");
|
||||
|
||||
push(WR,"BasicTypes");
|
||||
write(WR,std::string("i16"),i16);
|
||||
write(WR,"u16",u16);
|
||||
write(WR,"i32",i32);
|
||||
write(WR,"u32",u32);
|
||||
write(WR,"i64",i64);
|
||||
write(WR,"u64",u64);
|
||||
write(WR,"f",f);
|
||||
write(WR,"d",d);
|
||||
write(WR,"b",b);
|
||||
pop(WR);
|
||||
|
||||
myclass obj(1234); // non-trivial constructor
|
||||
write(WR,"obj",obj);
|
||||
|
||||
};
|
||||
|
||||
XMLReader RD("bother.xml");
|
||||
|
||||
myclass copy1;
|
||||
myclass copy2;
|
||||
myclass copy3;
|
||||
|
||||
read(RD,"obj",copy1);
|
||||
std::cout << "Loaded " << copy1<<std::endl;
|
||||
|
||||
{
|
||||
BinaryWriter BWR("bother.bin");
|
||||
write(BWR,"discard",copy1 );
|
||||
}
|
||||
{
|
||||
BinaryReader BRD("bother.bin");
|
||||
read (BRD,"discard",copy2 );
|
||||
std::cout<<copy2<<std::endl;
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
TextWriter TWR("bother.txt");
|
||||
write(TWR,"discard",copy1 );
|
||||
}
|
||||
{
|
||||
TextReader TRD("bother.txt");
|
||||
read (TRD,"discard",copy3 );
|
||||
std::cout<<copy3<<std::endl;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user