1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 17:25:37 +01:00

Merge branch 'develop' into feature/hadrons

This commit is contained in:
Antonin Portelli 2018-02-26 15:05:05 +00:00
commit e7e4cee4f3
27 changed files with 474 additions and 132 deletions

View File

@ -187,10 +187,11 @@ Alternatively, some CPU codenames can be directly used:
| `<code>` | Description | | `<code>` | Description |
| ----------- | -------------------------------------- | | ----------- | -------------------------------------- |
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
| `SKL` | [Intel Skylake with AVX512 extensions](https://ark.intel.com/products/codename/37572/Skylake#@server) |
| `BGQ` | Blue Gene/Q | | `BGQ` | Blue Gene/Q |
#### Notes: #### Notes:
- We currently support AVX512 only for the Intel compiler. Support for GCC and clang will appear in future versions of Grid when the AVX512 support within GCC and clang will be more advanced. - We currently support AVX512 for the Intel compiler and GCC (KNL and SKL target). Support for clang will appear in future versions of Grid when the AVX512 support in the compiler will be more advanced.
- For BG/Q only [bgclang](http://trac.alcf.anl.gov/projects/llvm-bgq) is supported. We do not presently plan to support more compilers for this platform. - For BG/Q only [bgclang](http://trac.alcf.anl.gov/projects/llvm-bgq) is supported. We do not presently plan to support more compilers for this platform.
- BG/Q performances are currently rather poor. This is being investigated for future versions. - BG/Q performances are currently rather poor. This is being investigated for future versions.
- The vector size for the `GEN` target can be specified with the `configure` script option `--enable-gen-simd-width`. - The vector size for the `GEN` target can be specified with the `configure` script option `--enable-gen-simd-width`.

View File

@ -48,7 +48,6 @@ int main (int argc, char ** argv)
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt4 = GridDefaultLatt(); std::vector<int> latt4 = GridDefaultLatt();
int Ls=16; int Ls=16;
@ -57,6 +56,10 @@ int main (int argc, char ** argv)
std::stringstream ss(argv[i+1]); ss >> Ls; std::stringstream ss(argv[i+1]); ss >> Ls;
} }
GridLogLayout();
long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
@ -187,7 +190,7 @@ int main (int argc, char ** argv)
FGrid->Barrier(); FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall; double flops=single_site_flops*volume*ncall;
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; // std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
@ -226,7 +229,7 @@ int main (int argc, char ** argv)
FGrid->Barrier(); FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall; double flops=single_site_flops*volume*ncall;
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
@ -277,7 +280,7 @@ int main (int argc, char ** argv)
double t1=usecond(); double t1=usecond();
FGrid->Barrier(); FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall; double flops=single_site_flops*volume*ncall;
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
@ -355,7 +358,7 @@ int main (int argc, char ** argv)
// sDw.stat.print(); // sDw.stat.print();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2; double flops=(single_site_flops*volume*ncall)/2.0;
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
@ -478,7 +481,7 @@ int main (int argc, char ** argv)
FGrid->Barrier(); FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2; double flops=(single_site_flops*volume*ncall)/2.0;
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;

View File

@ -51,6 +51,7 @@ int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
@ -107,6 +108,7 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc);
std::vector<int> seeds4({1,2,3,4}); std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8}); std::vector<int> seeds5({5,6,7,8});
@ -196,7 +198,7 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
if ( ! report ) { if ( ! report ) {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall; double flops=single_site_flops*volume*ncall;
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t"; std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
} }
@ -228,7 +230,7 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
if(!report){ if(!report){
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2; double flops=(single_site_flops*volume*ncall)/2.0;
std::cout<< flops/(t1-t0); std::cout<< flops/(t1-t0);
} }
} }
@ -237,6 +239,7 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
#define CHECK_SDW #define CHECK_SDW
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report ) void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
{ {
long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
@ -321,7 +324,7 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
Counter.Report(); Counter.Report();
} else { } else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall; double flops=single_site_flops*volume*ncall;
std::cout<<"\t"<< flops/(t1-t0); std::cout<<"\t"<< flops/(t1-t0);
} }
@ -358,7 +361,7 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
CounterSdw.Report(); CounterSdw.Report();
} else { } else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2; double flops=(single_site_flops*volume*ncall)/2.0;
std::cout<<"\t"<< flops/(t1-t0); std::cout<<"\t"<< flops/(t1-t0);
} }
} }

View File

@ -107,7 +107,7 @@ int main (int argc, char ** argv)
FGrid->Barrier(); FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=2*1344*volume*ncall; double flops=2*1320*volume*ncall;
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; // std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
@ -134,7 +134,7 @@ int main (int argc, char ** argv)
FGrid->Barrier(); FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=2*1344*volume*ncall; double flops=2*1320*volume*ncall;
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
@ -174,7 +174,7 @@ int main (int argc, char ** argv)
FGrid_d->Barrier(); FGrid_d->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=2*1344*volume*ncall; double flops=2*1320*volume*ncall;
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; // std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;

View File

@ -4,7 +4,7 @@
Source file: ./benchmarks/Benchmark_wilson.cc Source file: ./benchmarks/Benchmark_wilson.cc
Copyright (C) 2015 Copyright (C) 2018
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
@ -32,6 +32,9 @@ using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
#include "Grid/util/Profiling.h"
template<class d> template<class d>
struct scal { struct scal {
d internal; d internal;
@ -45,6 +48,7 @@ struct scal {
}; };
bool overlapComms = false; bool overlapComms = false;
bool perfProfiling = false;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
@ -53,6 +57,12 @@ int main (int argc, char ** argv)
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){
overlapComms = true; overlapComms = true;
} }
if( GridCmdOptionExists(argv,argv+argc,"--perf") ){
perfProfiling = true;
}
long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc);
std::vector<int> latt_size = GridDefaultLatt(); std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
@ -61,10 +71,15 @@ int main (int argc, char ** argv)
GridRedBlackCartesian RBGrid(&Grid); GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
GridLogLayout();
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl; std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl; std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl; std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
std::cout<<GridLogMessage << "Grid number of colours : "<< QCD::Nc <<std::endl;
std::cout<<GridLogMessage << "Benchmarking Wilson operator in the fundamental representation" << std::endl;
std::vector<int> seeds({1,2,3,4}); std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); GridParallelRNG pRNG(&Grid);
@ -134,9 +149,25 @@ int main (int argc, char ** argv)
Dw.Dhop(src,result,0); Dw.Dhop(src,result,0);
} }
double t1=usecond(); double t1=usecond();
double flops=1344*volume*ncall; double flops=single_site_flops*volume*ncall;
if (perfProfiling){
std::cout<<GridLogMessage << "Profiling Dw with perf"<<std::endl;
System::profile("kernel", [&]() {
for(int i=0;i<ncall;i++){
Dw.Dhop(src,result,0);
}
});
std::cout<<GridLogMessage << "Generated kernel.data"<<std::endl;
std::cout<<GridLogMessage << "Use with: perf report -i kernel.data"<<std::endl;
}
std::cout<<GridLogMessage << "Called Dw"<<std::endl; std::cout<<GridLogMessage << "Called Dw"<<std::endl;
std::cout<<GridLogMessage << "flops per site " << single_site_flops << std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl; std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;

View File

@ -62,6 +62,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Number of colours "<< QCD::Nc <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking WilsonFermionR::Dhop "<<std::endl; std::cout << GridLogMessage<< "* Benchmarking WilsonFermionR::Dhop "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
@ -69,13 +70,15 @@ int main (int argc, char ** argv)
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage << "* OpenMP threads : "<< GridThread::GetThreads() <<std::endl;
std::cout << GridLogMessage << "* MPI tasks : "<< GridCmdVectorIntToString(mpi_layout) << std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout<<GridLogMessage << "============================================================================="<< std::endl; std::cout<<GridLogMessage << "================================================================================================="<< std::endl;
std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl; std::cout<<GridLogMessage << "= Benchmarking Wilson operator in the fundamental representation" << std::endl;
std::cout<<GridLogMessage << "============================================================================="<< std::endl; std::cout<<GridLogMessage << "================================================================================================="<< std::endl;
std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl; std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs\tWilsonEO/MFLOPs\tWilsonDagEO/MFLOPs" << std::endl;
std::cout<<GridLogMessage << "============================================================================="<< std::endl; std::cout<<GridLogMessage << "================================================================================================="<< std::endl;
int Lmax = 32; int Lmax = 32;
int dmin = 0; int dmin = 0;
@ -98,12 +101,19 @@ int main (int argc, char ** argv)
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeGaugeField Umu(&Grid); random(pRNG,Umu); LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeFermion src(&Grid); random(pRNG,src); LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion src_o(&RBGrid); pickCheckerboard(Odd,src_o,src);
LatticeFermion result(&Grid); result=zero; LatticeFermion result(&Grid); result=zero;
LatticeFermion result_e(&RBGrid); result_e=zero;
double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>()); double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>());
WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params); WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
// Full operator
bench_wilson(src,result,Dw,volume,DaggerNo);
bench_wilson(src,result,Dw,volume,DaggerYes);
std::cout << "\t";
// EO
bench_wilson(src,result,Dw,volume,DaggerNo); bench_wilson(src,result,Dw,volume,DaggerNo);
bench_wilson(src,result,Dw,volume,DaggerYes); bench_wilson(src,result,Dw,volume,DaggerYes);
std::cout << std::endl; std::cout << std::endl;
@ -122,9 +132,26 @@ void bench_wilson (
int const dag ) int const dag )
{ {
int ncall = 1000; int ncall = 1000;
long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc);
double t0 = usecond(); double t0 = usecond();
for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); } for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); }
double t1 = usecond(); double t1 = usecond();
double flops = 1344 * volume * ncall; double flops = single_site_flops * volume * ncall;
std::cout << flops/(t1-t0) << "\t\t";
}
void bench_wilson_eo (
LatticeFermion & src,
LatticeFermion & result,
WilsonFermionR & Dw,
double const volume,
int const dag )
{
int ncall = 1000;
long unsigned int single_site_flops = 8*QCD::Nc*(7+16*QCD::Nc);
double t0 = usecond();
for(int i=0; i<ncall; i++) { Dw.DhopEO(src,result,dag); }
double t1 = usecond();
double flops = (single_site_flops * volume * ncall)/2.0;
std::cout << flops/(t1-t0) << "\t\t"; std::cout << flops/(t1-t0) << "\t\t";
} }

View File

@ -249,6 +249,9 @@ case ${ax_cv_cxx_compiler_vendor} in
AVX512) AVX512)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
SKL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for SkyLake Xeon])
SIMD_FLAGS='-march=skylake-avx512';;
KNC) KNC)
AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner]) AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
SIMD_FLAGS='';; SIMD_FLAGS='';;

View File

@ -57,7 +57,7 @@ std::vector<std::string> TFundtoHirep<Rep>::getOutput(void)
template <typename Rep> template <typename Rep>
void TFundtoHirep<Rep>::setup(void) void TFundtoHirep<Rep>::setup(void)
{ {
env().template registerLattice<typename Rep::LatticeField>(getName()); envCreateLat(typename Rep::LatticeField, getName());
} }
// execution /////////////////////////////////////////////////////////////////// // execution ///////////////////////////////////////////////////////////////////
@ -70,6 +70,6 @@ void TFundtoHirep<Rep>::execute(void)
Rep TargetRepresentation(U._grid); Rep TargetRepresentation(U._grid);
TargetRepresentation.update_representation(U); TargetRepresentation.update_representation(U);
typename Rep::LatticeField &URep = *env().template createLattice<typename Rep::LatticeField>(getName()); auto &URep = envGet(typename Rep::LatticeField, getName());
URep = TargetRepresentation.U; URep = TargetRepresentation.U;
} }

View File

@ -39,6 +39,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/approx/MultiShiftFunction.h> #include <Grid/algorithms/approx/MultiShiftFunction.h>
#include <Grid/algorithms/approx/Forecast.h> #include <Grid/algorithms/approx/Forecast.h>
#include <Grid/algorithms/iterative/Deflation.h>
#include <Grid/algorithms/iterative/ConjugateGradient.h> #include <Grid/algorithms/iterative/ConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateResidual.h> #include <Grid/algorithms/iterative/ConjugateResidual.h>
#include <Grid/algorithms/iterative/NormalEquations.h> #include <Grid/algorithms/iterative/NormalEquations.h>

View File

@ -0,0 +1,101 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_DEFLATION_H
#define GRID_DEFLATION_H
namespace Grid {
struct ZeroGuesser {
public:
template<class Field>
void operator()(const Field &src,Field &guess) { guess = Zero(); };
};
struct SourceGuesser {
public:
template<class Field>
void operator()(const Field &src,Field &guess) { guess = src; };
};
////////////////////////////////
// Fine grid deflation
////////////////////////////////
template<class Field>
struct DeflatedGuesser {
private:
const std::vector<Field> &evec;
const std::vector<RealD> &eval;
public:
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
void operator()(const Field &src,Field &guess) {
guess = zero;
assert(evec.size()==eval.size());
auto N = evec.size();
for (int i=0;i<N;i++) {
Field& tmp = evec[i];
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
}
}
};
template<class FineField, class CoarseField>
class LocalCoherenceDeflatedGuesser {
private:
const std::vector<FineField> &subspace;
const std::vector<CoarseField> &evec_coarse;
const std::vector<RealD> &eval_coarse;
public:
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
const std::vector<CoarseField> &_evec_coarse,
const std::vector<RealD> &_eval_coarse)
: subspace(_subspace),
evec_coarse(_evec_coarse),
eval_coarse(_eval_coarse)
{
}
void operator()(const FineField &src,FineField &guess) {
int N = (int)evec_coarse.size();
CoarseField src_coarse(evec_coarse[0]._grid);
CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero;
blockProject(src,src_coarse,subspace);
for (int i=0;i<N;i++) {
CoarseField & tmp = evec_coarse[i];
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
}
blockPromote(guess_coarse,guess,subspace);
};
};
}
#endif

View File

@ -149,19 +149,6 @@ void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, boo
basisReorderInPlace(_v,sort_vals,idx); basisReorderInPlace(_v,sort_vals,idx);
} }
// PAB: faster to compute the inner products first then fuse loops.
// If performance critical can improve.
template<class Field>
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
result = zero;
assert(_v.size()==eval.size());
int N = (int)_v.size();
for (int i=0;i<N;i++) {
Field& tmp = _v[i];
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
}
}
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
// Implicitly restarted lanczos // Implicitly restarted lanczos
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
@ -181,6 +168,7 @@ enum IRLdiagonalisation {
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field> template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
{ {
public: public:
LinearFunction<Field> &_HermOp; LinearFunction<Field> &_HermOp;
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { }; ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
@ -243,6 +231,7 @@ class ImplicitlyRestartedLanczos {
///////////////////////// /////////////////////////
public: public:
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// PAB: // PAB:
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////

View File

@ -28,7 +28,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */ /* END LEGAL */
#ifndef GRID_LOCAL_COHERENCE_IRL_H #ifndef GRID_LOCAL_COHERENCE_IRL_H
#define GRID_LOCAL_COHERENCE_IRL_H #define GRID_LOCAL_COHERENCE_IRL_H
namespace Grid { namespace Grid {
struct LanczosParams : Serializable { struct LanczosParams : Serializable {
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
@ -70,21 +73,24 @@ public:
typedef Lattice<Fobj> FineField; typedef Lattice<Fobj> FineField;
LinearOperatorBase<FineField> &_Linop; LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate; std::vector<FineField> &subspace;
ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) : ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) :
_Linop(linop), _Linop(linop), subspace(_subspace)
_Aggregate(aggregate) { }; {
assert(subspace.size() >0);
};
void operator()(const CoarseField& in, CoarseField& out) { void operator()(const CoarseField& in, CoarseField& out) {
GridBase *FineGrid = subspace[0]._grid;
int checkerboard = subspace[0].checkerboard;
GridBase *FineGrid = _Aggregate.FineGrid; FineField fin (FineGrid); fin.checkerboard= checkerboard;
FineField fin(FineGrid); FineField fout(FineGrid); fout.checkerboard = checkerboard;
FineField fout(FineGrid);
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl; blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl; _Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl; blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
} }
}; };
@ -99,24 +105,27 @@ public:
OperatorFunction<FineField> & _poly; OperatorFunction<FineField> & _poly;
LinearOperatorBase<FineField> &_Linop; LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate; std::vector<FineField> &subspace;
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop, ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
Aggregation<Fobj,CComplex,nbasis> &aggregate) : LinearOperatorBase<FineField>& linop,
std::vector<FineField> & _subspace) :
_poly(poly), _poly(poly),
_Linop(linop), _Linop(linop),
_Aggregate(aggregate) { }; subspace(_subspace)
{ };
void operator()(const CoarseField& in, CoarseField& out) { void operator()(const CoarseField& in, CoarseField& out) {
GridBase *FineGrid = _Aggregate.FineGrid; GridBase *FineGrid = subspace[0]._grid;
int checkerboard = subspace[0].checkerboard;
FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; FineField fin (FineGrid); fin.checkerboard =checkerboard;
FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; FineField fout(FineGrid);fout.checkerboard =checkerboard;
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl; blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl; _poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl; blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
} }
}; };
@ -132,19 +141,23 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc
LinearFunction<CoarseField> & _Poly; LinearFunction<CoarseField> & _Poly;
OperatorFunction<FineField> & _smoother; OperatorFunction<FineField> & _smoother;
LinearOperatorBase<FineField> &_Linop; LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
RealD _coarse_relax_tol; RealD _coarse_relax_tol;
std::vector<FineField> &_subspace;
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly, ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
OperatorFunction<FineField> &smoother, OperatorFunction<FineField> &smoother,
LinearOperatorBase<FineField> &Linop, LinearOperatorBase<FineField> &Linop,
Aggregation<Fobj,CComplex,nbasis> &Aggregate, std::vector<FineField> &subspace,
RealD coarse_relax_tol=5.0e3) RealD coarse_relax_tol=5.0e3)
: _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { }; : _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
_coarse_relax_tol(coarse_relax_tol)
{ };
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{ {
CoarseField v(B); CoarseField v(B);
RealD eval_poly = eval; RealD eval_poly = eval;
// Apply operator // Apply operator
_Poly(B,v); _Poly(B,v);
@ -168,14 +181,13 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc
} }
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{ {
GridBase *FineGrid = _Aggregate.FineGrid; GridBase *FineGrid = _subspace[0]._grid;
int checkerboard = _subspace[0].checkerboard;
int checkerboard = _Aggregate.checkerboard;
FineField fB(FineGrid);fB.checkerboard =checkerboard; FineField fB(FineGrid);fB.checkerboard =checkerboard;
FineField fv(FineGrid);fv.checkerboard =checkerboard; FineField fv(FineGrid);fv.checkerboard =checkerboard;
_Aggregate.PromoteFromSubspace(B,fv); blockPromote(B,fv,_subspace);
_smoother(_Linop,fv,fB); _smoother(_Linop,fv,fB);
RealD eval_poly = eval; RealD eval_poly = eval;
@ -217,27 +229,65 @@ protected:
int _checkerboard; int _checkerboard;
LinearOperatorBase<FineField> & _FineOp; LinearOperatorBase<FineField> & _FineOp;
// FIXME replace Aggregation with vector of fine; the code reuse is too small for std::vector<RealD> &evals_fine;
// the hassle and complexity of cross coupling. std::vector<RealD> &evals_coarse;
Aggregation<Fobj,CComplex,nbasis> _Aggregate; std::vector<FineField> &subspace;
std::vector<RealD> evals_fine; std::vector<CoarseField> &evec_coarse;
std::vector<RealD> evals_coarse;
std::vector<CoarseField> evec_coarse; private:
std::vector<RealD> _evals_fine;
std::vector<RealD> _evals_coarse;
std::vector<FineField> _subspace;
std::vector<CoarseField> _evec_coarse;
public: public:
LocalCoherenceLanczos(GridBase *FineGrid, LocalCoherenceLanczos(GridBase *FineGrid,
GridBase *CoarseGrid, GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp, LinearOperatorBase<FineField> &FineOp,
int checkerboard) : int checkerboard) :
_CoarseGrid(CoarseGrid), _CoarseGrid(CoarseGrid),
_FineGrid(FineGrid), _FineGrid(FineGrid),
_Aggregate(CoarseGrid,FineGrid,checkerboard),
_FineOp(FineOp), _FineOp(FineOp),
_checkerboard(checkerboard) _checkerboard(checkerboard),
evals_fine (_evals_fine),
evals_coarse(_evals_coarse),
subspace (_subspace),
evec_coarse(_evec_coarse)
{ {
evals_fine.resize(0); evals_fine.resize(0);
evals_coarse.resize(0); evals_coarse.resize(0);
}; };
void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } //////////////////////////////////////////////////////////////////////////
// Alternate constructore, external storage for use by Hadrons module
//////////////////////////////////////////////////////////////////////////
LocalCoherenceLanczos(GridBase *FineGrid,
GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard,
std::vector<FineField> &ext_subspace,
std::vector<CoarseField> &ext_coarse,
std::vector<RealD> &ext_eval_fine,
std::vector<RealD> &ext_eval_coarse
) :
_CoarseGrid(CoarseGrid),
_FineGrid(FineGrid),
_FineOp(FineOp),
_checkerboard(checkerboard),
evals_fine (ext_eval_fine),
evals_coarse(ext_eval_coarse),
subspace (ext_subspace),
evec_coarse (ext_coarse)
{
evals_fine.resize(0);
evals_coarse.resize(0);
};
void Orthogonalise(void ) {
CoarseScalar InnerProd(_CoarseGrid);
blockOrthogonalise(InnerProd,subspace);std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
};
template<typename T> static RealD normalise(T& v) template<typename T> static RealD normalise(T& v)
{ {
@ -246,43 +296,44 @@ public:
v = v * (1.0/nn); v = v * (1.0/nn);
return nn; return nn;
} }
/*
void fakeFine(void) void fakeFine(void)
{ {
int Nk = nbasis; int Nk = nbasis;
_Aggregate.subspace.resize(Nk,_FineGrid); subspace.resize(Nk,_FineGrid);
_Aggregate.subspace[0]=1.0; subspace[0]=1.0;
_Aggregate.subspace[0].checkerboard=_checkerboard; subspace[0].checkerboard=_checkerboard;
normalise(_Aggregate.subspace[0]); normalise(subspace[0]);
PlainHermOp<FineField> Op(_FineOp); PlainHermOp<FineField> Op(_FineOp);
for(int k=1;k<Nk;k++){ for(int k=1;k<Nk;k++){
_Aggregate.subspace[k].checkerboard=_checkerboard; subspace[k].checkerboard=_checkerboard;
Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]); Op(subspace[k-1],subspace[k]);
normalise(_Aggregate.subspace[k]); normalise(subspace[k]);
} }
} }
*/
void testFine(RealD resid) void testFine(RealD resid)
{ {
assert(evals_fine.size() == nbasis); assert(evals_fine.size() == nbasis);
assert(_Aggregate.subspace.size() == nbasis); assert(subspace.size() == nbasis);
PlainHermOp<FineField> Op(_FineOp); PlainHermOp<FineField> Op(_FineOp);
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op); ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
for(int k=0;k<nbasis;k++){ for(int k=0;k<nbasis;k++){
assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1); assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
} }
} }
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax) void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
{ {
assert(evals_fine.size() == nbasis); assert(evals_fine.size() == nbasis);
assert(_Aggregate.subspace.size() == nbasis); assert(subspace.size() == nbasis);
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth); Chebyshev<FineField> ChebySmooth(cheby_smooth);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate); ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_subspace);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
for(int k=0;k<evec_coarse.size();k++){ for(int k=0;k<evec_coarse.size();k++){
if ( k < nbasis ) { if ( k < nbasis ) {
@ -302,34 +353,34 @@ public:
PlainHermOp<FineField> Op(_FineOp); PlainHermOp<FineField> Op(_FineOp);
evals_fine.resize(Nm); evals_fine.resize(Nm);
_Aggregate.subspace.resize(Nm,_FineGrid); subspace.resize(Nm,_FineGrid);
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
int Nconv; int Nconv;
IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); IRL.calc(evals_fine,subspace,src,Nconv,false);
// Shrink down to number saved // Shrink down to number saved
assert(Nstop>=nbasis); assert(Nstop>=nbasis);
assert(Nconv>=nbasis); assert(Nconv>=nbasis);
evals_fine.resize(nbasis); evals_fine.resize(nbasis);
_Aggregate.subspace.resize(nbasis,_FineGrid); subspace.resize(nbasis,_FineGrid);
} }
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
int Nstop, int Nk, int Nm,RealD resid, int Nstop, int Nk, int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes) RealD MaxIt, RealD betastp, int MinRes)
{ {
Chebyshev<FineField> Cheby(cheby_op); Chebyshev<FineField> Cheby(cheby_op);
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate); ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_subspace);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate); ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_subspace);
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth); Chebyshev<FineField> ChebySmooth(cheby_smooth);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_subspace,relax);
evals_coarse.resize(Nm); evals_coarse.resize(Nm);
evec_coarse.resize(Nm,_CoarseGrid); evec_coarse.resize(Nm,_CoarseGrid);

View File

@ -108,6 +108,11 @@ namespace Grid {
template<class Matrix> template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){ void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function // FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp // FIXME use CBfactorise to control schur decomp
@ -129,7 +134,6 @@ namespace Grid {
pickCheckerboard(Odd ,src_o,in); pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out); pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out); pickCheckerboard(Odd ,sol_o,out);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl; std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
@ -146,6 +150,7 @@ namespace Grid {
// Call the red-black solver // Call the red-black solver
////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl; std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
guess(src_o,sol_o);
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd); _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl; std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
@ -190,6 +195,11 @@ namespace Grid {
}; };
template<class Matrix> template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){ void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function // FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp // FIXME use CBfactorise to control schur decomp
@ -225,6 +235,7 @@ namespace Grid {
// Call the red-black solver // Call the red-black solver
////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
guess(src_o,sol_o);
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd); _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
@ -269,6 +280,11 @@ namespace Grid {
template<class Matrix> template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){ void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix,class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function // FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp // FIXME use CBfactorise to control schur decomp
@ -305,6 +321,7 @@ namespace Grid {
////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd); // _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
guess(src_o,tmp);
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd); _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd); _Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
@ -348,6 +365,11 @@ namespace Grid {
template<class Matrix> template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){ void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function // FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp // FIXME use CBfactorise to control schur decomp
@ -385,6 +407,7 @@ namespace Grid {
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd); // _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
// _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd); // _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
guess(src_o,tmp);
_HermitianRBSolver(src_o,tmp); assert(tmp.checkerboard==Odd); _HermitianRBSolver(src_o,tmp); assert(tmp.checkerboard==Odd);
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd); _Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);

View File

@ -44,11 +44,15 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
MPI_Initialized(&flag); // needed to coexist with other libs apparently MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) { if ( !flag ) {
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
assert (provided == MPI_THREAD_MULTIPLE); //If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
(nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) )
assert(0);
} }
Grid_quiesce_nodes(); Grid_quiesce_nodes();
// Never clean up as done once.
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
GlobalSharedMemory::Init(communicator_world); GlobalSharedMemory::Init(communicator_world);
@ -85,9 +89,17 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{ {
MPI_Comm optimal_comm; MPI_Comm optimal_comm;
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine ////////////////////////////////////////////////////
// Remap using the shared memory optimising routine
// The remap creates a comm which must be freed
////////////////////////////////////////////////////
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
InitFromMPICommunicator(processors,optimal_comm); InitFromMPICommunicator(processors,optimal_comm);
SetCommunicator(optimal_comm); SetCommunicator(optimal_comm);
///////////////////////////////////////////////////
// Free the temp communicator
///////////////////////////////////////////////////
MPI_Comm_free(&optimal_comm);
} }
////////////////////////////////// //////////////////////////////////
@ -183,8 +195,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
} else { } else {
srank = 0; srank = 0;
comm_split = parent.communicator; int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
// std::cout << " Inherited communicator " <<comm_split <<std::endl; assert(ierr==0);
} }
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
@ -197,6 +209,11 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
SetCommunicator(comm_split); SetCommunicator(comm_split);
///////////////////////////////////////////////
// Free the temp communicator
///////////////////////////////////////////////
MPI_Comm_free(&comm_split);
if(0){ if(0){
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl; std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
for(int d=0;d<processors.size();d++){ for(int d=0;d<processors.size();d++){
@ -210,6 +227,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base) void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
{ {
////////////////////////////////////////////////////
// Creates communicator, and the communicator_halo
////////////////////////////////////////////////////
_ndimension = processors.size(); _ndimension = processors.size();
_processor_coor.resize(_ndimension); _processor_coor.resize(_ndimension);

View File

@ -133,6 +133,7 @@ class SharedMemory
public: public:
SharedMemory() {}; SharedMemory() {};
~SharedMemory();
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
// set the buffers & sizes // set the buffers & sizes
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////

View File

@ -182,6 +182,7 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
#ifdef GRID_MPI3_SHMMMAP #ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<<std::endl;
assert(_ShmSetup==1); assert(_ShmSetup==1);
assert(_ShmAlloc==0); assert(_ShmAlloc==0);
////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -218,6 +219,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0); assert(((uint64_t)ptr&0x3F)==0);
close(fd); close(fd);
WorldShmCommBufs[r] =ptr; WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
} }
_ShmAlloc=1; _ShmAlloc=1;
_ShmAllocBytes = bytes; _ShmAllocBytes = bytes;
@ -232,6 +234,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
assert(_ShmSetup==1); assert(_ShmSetup==1);
assert(_ShmAlloc==0); assert(_ShmAlloc==0);
MPI_Barrier(WorldShmComm); MPI_Barrier(WorldShmComm);
@ -259,7 +262,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#endif #endif
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); } std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
if ( ptr == (void * )MAP_FAILED ) {
perror("failed mmap");
assert(0);
}
assert(((uint64_t)ptr&0x3F)==0); assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr; WorldShmCommBufs[r] =ptr;
@ -318,11 +325,12 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
heap_size = GlobalSharedMemory::ShmAllocBytes(); heap_size = GlobalSharedMemory::ShmAllocBytes();
for(int r=0;r<ShmSize;r++){ for(int r=0;r<ShmSize;r++){
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ; uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
MPI_Allreduce(MPI_IN_PLACE,&sr,1,MPI_UINT32_T,MPI_SUM,comm); MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[sr]; ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
// std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< " wsr = "<<wsr<<std::endl;
} }
ShmBufferFreeAll(); ShmBufferFreeAll();
@ -391,5 +399,9 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
return (void *) remote; return (void *) remote;
} }
} }
SharedMemory::~SharedMemory()
{
MPI_Comm_free(&ShmComm);
};
} }

View File

@ -122,5 +122,7 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
{ {
return NULL; return NULL;
} }
SharedMemory::~SharedMemory()
{};
} }

View File

@ -73,7 +73,7 @@ void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi
this->DW(psi,tmp_f,DaggerYes); this->DW(psi,tmp_f,DaggerYes);
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
axpby_ssp(chi,Coeff_t(1.0),psi,-cs[s],tmp_f,s,s);// chi = (1-c[s] D_W) psi axpby_ssp(chi,Coeff_t(1.0),psi,conjugate(-cs[s]),tmp_f,s,s);// chi = (1-c[s] D_W) psi
} }
} }

View File

@ -469,7 +469,7 @@ void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionFie
} }
a0 = a0+incr; a0 = a0+incr;
a1 = a1+incr; a1 = a1+incr;
a2 = a2+sizeof(Simd::scalar_type); a2 = a2+sizeof(typename Simd::scalar_type);
}} }}
{ {
int lexa = s1+LLs*site; int lexa = s1+LLs*site;
@ -701,7 +701,7 @@ void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionFi
} }
a0 = a0+incr; a0 = a0+incr;
a1 = a1+incr; a1 = a1+incr;
a2 = a2+sizeof(Simd::scalar_type); a2 = a2+sizeof(typename Simd::scalar_type);
}} }}
{ {
int lexa = s1+LLs*site; int lexa = s1+LLs*site;

View File

@ -475,7 +475,7 @@ namespace QCD {
} }
a0 = a0 + incr; a0 = a0 + incr;
a1 = a1 + incr; a1 = a1 + incr;
a2 = a2 + sizeof(Simd::scalar_type); a2 = a2 + sizeof(typename Simd::scalar_type);
} }
} }

View File

@ -853,7 +853,7 @@ namespace QCD {
a0 = a0 + incr; a0 = a0 + incr;
a1 = a1 + incr; a1 = a1 + incr;
a2 = a2 + sizeof(Simd::scalar_type); a2 = a2 + sizeof(typename Simd::scalar_type);
} }
} }

View File

@ -556,7 +556,7 @@ namespace Optimization {
v3 = _mm256_add_epi32(v1, v2); v3 = _mm256_add_epi32(v1, v2);
v1 = _mm256_hadd_epi32(v3, v3); v1 = _mm256_hadd_epi32(v3, v3);
v2 = _mm256_hadd_epi32(v1, v1); v2 = _mm256_hadd_epi32(v1, v1);
u1 = _mm256_castsi256_si128(v2) // upper half u1 = _mm256_castsi256_si128(v2); // upper half
u2 = _mm256_extracti128_si256(v2, 1); // lower half u2 = _mm256_extracti128_si256(v2, 1); // lower half
ret = _mm_add_epi32(u1, u2); ret = _mm_add_epi32(u1, u2);
return _mm_cvtsi128_si32(ret); return _mm_cvtsi128_si32(ret);

72
lib/util/Profiling.h Normal file
View File

@ -0,0 +1,72 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/util/Profiling.h
Copyright (C) 2018
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_PERF_PROFILING_H
#define GRID_PERF_PROFILING_H
#include <sstream>
#include <iostream>
#include <functional>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <unistd.h>
#include <signal.h>
struct System
{
static void profile(const std::string& name,std::function<void()> body) {
std::string filename = name.find(".data") == std::string::npos ? (name + ".data") : name;
// Launch profiler
pid_t pid;
std::stringstream s;
s << getpid();
pid = fork();
if (pid == 0) {
auto fd=open("/dev/null",O_RDWR);
dup2(fd,1);
dup2(fd,2);
exit(execl("/usr/bin/perf","perf","record","-o",filename.c_str(),"-p",s.str().c_str(),nullptr));
}
// Run body
body();
// Kill profiler
kill(pid,SIGINT);
waitpid(pid,nullptr,0);
}
static void profile(std::function<void()> body) {
profile("perf.data",body);
}
};
#endif // GRID_PERF_PROFILING_H

View File

@ -111,6 +111,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage<<"Error "<<norm2(err)<<std::endl; std::cout<<GridLogMessage<<"Error "<<norm2(err)<<std::endl;
const int nbasis = 2; const int nbasis = 2;
const int cb = 0 ;
LatticeFermion prom(FGrid); LatticeFermion prom(FGrid);
std::vector<LatticeFermion> subspace(nbasis,FGrid); std::vector<LatticeFermion> subspace(nbasis,FGrid);
@ -119,7 +120,7 @@ int main (int argc, char ** argv)
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf); MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
Subspace Aggregates(Coarse5d,FGrid); Subspace Aggregates(Coarse5d,FGrid,cb);
Aggregates.CreateSubspaceRandom(RNG5); Aggregates.CreateSubspaceRandom(RNG5);
subspace=Aggregates.subspace; subspace=Aggregates.subspace;

View File

@ -78,6 +78,7 @@ int main (int argc, char ** argv)
RealD mass=0.1; RealD mass=0.1;
RealD M5=1.5; RealD M5=1.5;
int cb=0;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl; std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
@ -95,7 +96,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl; std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf); MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
Subspace Aggregates(Coarse5d,FGrid); Subspace Aggregates(Coarse5d,FGrid,cb);
Aggregates.CreateSubspace(RNG5,HermDefOp); Aggregates.CreateSubspace(RNG5,HermDefOp);

View File

@ -56,12 +56,12 @@ public:
void checkpointFine(std::string evecs_file,std::string evals_file) void checkpointFine(std::string evecs_file,std::string evals_file)
{ {
assert(this->_Aggregate.subspace.size()==nbasis); assert(this->subspace.size()==nbasis);
emptyUserRecord record; emptyUserRecord record;
Grid::QCD::ScidacWriter WR; Grid::QCD::ScidacWriter WR;
WR.open(evecs_file); WR.open(evecs_file);
for(int k=0;k<nbasis;k++) { for(int k=0;k<nbasis;k++) {
WR.writeScidacFieldRecord(this->_Aggregate.subspace[k],record); WR.writeScidacFieldRecord(this->subspace[k],record);
} }
WR.close(); WR.close();
@ -72,7 +72,7 @@ public:
void checkpointFineRestore(std::string evecs_file,std::string evals_file) void checkpointFineRestore(std::string evecs_file,std::string evals_file)
{ {
this->evals_fine.resize(nbasis); this->evals_fine.resize(nbasis);
this->_Aggregate.subspace.resize(nbasis,this->_FineGrid); this->subspace.resize(nbasis,this->_FineGrid);
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl; std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl;
XmlReader RDx(evals_file); XmlReader RDx(evals_file);
@ -85,8 +85,8 @@ public:
Grid::QCD::ScidacReader RD ; Grid::QCD::ScidacReader RD ;
RD.open(evecs_file); RD.open(evecs_file);
for(int k=0;k<nbasis;k++) { for(int k=0;k<nbasis;k++) {
this->_Aggregate.subspace[k].checkerboard=this->_checkerboard; this->subspace[k].checkerboard=this->_checkerboard;
RD.readScidacFieldRecord(this->_Aggregate.subspace[k],record); RD.readScidacFieldRecord(this->subspace[k],record);
} }
RD.close(); RD.close();
@ -221,7 +221,9 @@ int main (int argc, char ** argv) {
std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl; std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
_LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml")); _LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml"));
_LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check _LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
std::cout << GridLogIRL<<"Orthogonalising"<<std::endl;
_LocalCoherenceLanczos.Orthogonalise(); _LocalCoherenceLanczos.Orthogonalise();
std::cout << GridLogIRL<<"Orthogonaled"<<std::endl;
} }
if ( Params.doFineRead ) { if ( Params.doFineRead ) {
@ -231,8 +233,6 @@ int main (int argc, char ** argv) {
} }
if ( Params.doCoarse ) { if ( Params.doCoarse ) {
std::cout << GridLogMessage << "Orthogonalising " << nbasis<<" Nm "<<Nm2<< std::endl;
std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl; std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl;
_LocalCoherenceLanczos.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol, _LocalCoherenceLanczos.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol,
coarse.Nstop, coarse.Nk,coarse.Nm, coarse.Nstop, coarse.Nk,coarse.Nm,