1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-16 14:57:05 +01:00

Merge branch 'develop' into feature/CG_repro

This commit is contained in:
Guido Cossu
2016-11-09 14:44:46 +00:00
20 changed files with 251 additions and 125 deletions

View File

@ -3,8 +3,8 @@ SUBDIRS = lib benchmarks tests
.PHONY: tests
tests:
make -C tests tests
tests: all
$(MAKE) -C tests tests
AM_CXXFLAGS += -I$(top_builddir)/include
ACLOCAL_AMFLAGS = -I m4

44
README
View File

@ -1,44 +0,0 @@
This library provides data parallel C++ container classes with internal memory layout
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
array indices to both MPI tasks and SIMD processing elements.
* Identically shaped arrays then be processed with perfect data parallelisation.
* Such identically shapped arrays are called conformable arrays.
The transformation is based on the observation that Cartesian array processing involves
identical processing to be performed on different regions of the Cartesian array.
The library will (eventually) both geometrically decompose into MPI tasks and across SIMD lanes.
Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but
optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification
for most programmers.
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
These are presented as
vRealF, vRealD, vComplexF, vComplexD
internal vector data types. These may be useful in themselves for other programmers.
The corresponding scalar types are named
RealF, RealD, ComplexF, ComplexD
MPI parallelism is UNIMPLEMENTED and for now only OpenMP and SIMD parallelism is present in the library.
You can give `configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here
is are examples:
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none

1
README Symbolic link
View File

@ -0,0 +1 @@
README.md

View File

@ -126,7 +126,7 @@ If you want to build all the tests at once just use `make tests`.
### Possible communication interfaces
The following options can be use with the `--enable-simd=` option to target different communication interfaces:
The following options can be use with the `--enable-comms=` option to target different communication interfaces:
| `<comm>` | Description |
| -------------- | ------------------------------------------------------------- |

View File

@ -193,6 +193,7 @@ int main (int argc, char ** argv)
}
}
Nloop=100;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
@ -271,5 +272,90 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
Nloop=100;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
lat*mpi_layout[2],
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
Grid.ShmBufferFreeAll();
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
for(int i=0;i<Nloop;i++){
std::vector<CartesianCommunicator::CommsRequest_t> requests;
ncomm=0;
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
// Grid.StencilSendToRecvFromComplete(requests);
// requests.resize(0);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
Grid.StencilSendToRecvFromComplete(requests);
requests.resize(0);
}
}
Grid.Barrier();
}
double stop=usecond();
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start; // microseconds
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
Grid_finalize();
}

View File

@ -57,7 +57,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt4 = GridDefaultLatt();
const int Ls=16;
const int Ls=8;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
@ -138,7 +138,7 @@ int main (int argc, char ** argv)
int ncall =100;
if (1) {
FGrid->Barrier();
Dw.ZeroCounters();
double t0=usecond();
for(int i=0;i<ncall;i++){
@ -147,6 +147,7 @@ int main (int argc, char ** argv)
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
@ -158,7 +159,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert (norm2(err)< 1.0e-5 );
assert (norm2(err)< 1.0e-4 );
Dw.Report();
}
@ -193,6 +194,7 @@ int main (int argc, char ** argv)
pokeSite(tmp,ssrc,site);
}}}}}
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
FGrid->Barrier();
double t0=usecond();
sDw.ZeroCounters();
for(int i=0;i<ncall;i++){
@ -201,6 +203,7 @@ int main (int argc, char ** argv)
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
@ -211,12 +214,12 @@ int main (int argc, char ** argv)
if(0){
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
sDw.Dhop(ssrc,sresult,0);
PerformanceCounter Counter(i);
Counter.Start();
sDw.Dhop(ssrc,sresult,0);
Counter.Stop();
Counter.Report();
sDw.Dhop(ssrc,sresult,0);
PerformanceCounter Counter(i);
Counter.Start();
sDw.Dhop(ssrc,sresult,0);
Counter.Stop();
Counter.Report();
}
}
@ -240,7 +243,7 @@ int main (int argc, char ** argv)
}
}}}}}
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
assert (sum< 1.0e-5 );
assert (sum< 1.0e-4 );
if (1) {
@ -271,6 +274,7 @@ int main (int argc, char ** argv)
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
FGrid->Barrier();
sDw.ZeroCounters();
sDw.stat.init("DhopEO");
double t0=usecond();
@ -278,6 +282,7 @@ int main (int argc, char ** argv)
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
}
double t1=usecond();
FGrid->Barrier();
sDw.stat.print();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
@ -301,7 +306,7 @@ int main (int argc, char ** argv)
error+= norm2(ssrc_o);
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
if(error>1.0e-5) {
if(error>1.0e-4) {
setCheckerboard(ssrc,ssrc_o);
setCheckerboard(ssrc,ssrc_e);
std::cout<< ssrc << std::endl;
@ -337,7 +342,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert(norm2(err)<1.0e-5);
assert(norm2(err)<1.0e-4);
LatticeFermion src_e (FrbGrid);
LatticeFermion src_o (FrbGrid);
LatticeFermion r_e (FrbGrid);
@ -363,11 +368,13 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
{
Dw.ZeroCounters();
FGrid->Barrier();
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.DhopEO(src_o,r_e,DaggerNo);
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2;
@ -389,14 +396,14 @@ int main (int argc, char ** argv)
err = r_eo-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert(norm2(err)<1.0e-5);
assert(norm2(err)<1.0e-4);
pickCheckerboard(Even,src_e,err);
pickCheckerboard(Odd,src_o,err);
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
assert(norm2(src_e)<1.0e-5);
assert(norm2(src_o)<1.0e-5);
assert(norm2(src_e)<1.0e-4);
assert(norm2(src_o)<1.0e-4);
Grid_finalize();
}

View File

@ -69,8 +69,8 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
int Lmax=32;
int dmin=0;
int Lmax=16;
int dmin=2;
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
for (int L=8;L<=Lmax;L*=2){

View File

@ -29,9 +29,13 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef _GRID_FFT_H_
#define _GRID_FFT_H_
#ifdef HAVE_FFTW
#ifdef HAVE_FFTW
#ifdef USE_MKL
#include <fftw/fftw3.h>
#else
#include <fftw3.h>
#endif
#endif
namespace Grid {
@ -122,7 +126,8 @@ namespace Grid {
double Flops(void) {return flops;}
double MFlops(void) {return flops/usec;}
double USec(void) {return (double)usec;}
FFT ( GridCartesian * grid ) :
vgrid(grid),
Nd(grid->_ndimension),

View File

@ -369,7 +369,7 @@ void Grid_init(int *argc,char ***argv)
void Grid_finalize(void)
{
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
MPI_Finalize();
Grid_unquiesce_nodes();
#endif

View File

@ -93,7 +93,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
////////////////////////////////////////////////////////////
void Grid_quiesce_nodes(void) {
int me = 0;
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3)
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L)
MPI_Comm_rank(MPI_COMM_WORLD, &me);
#endif
#ifdef GRID_COMMS_SHMEM

View File

@ -43,6 +43,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#else
#include <sys/syscall.h>
#endif
#ifdef __x86_64__
#include <x86intrin.h>
#endif
namespace Grid {
@ -86,7 +89,6 @@ inline uint64_t cyclecount(void){
return tmp;
}
#elif defined __x86_64__
#include <x86intrin.h>
inline uint64_t cyclecount(void){
return __rdtsc();
// unsigned int dummy;

View File

@ -191,7 +191,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
<< LinalgTimer.Elapsed();
std::cout << std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 1000.0);
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
if (!CGState.do_repro && ReproTest){
CGState.do_repro = true;

View File

@ -97,7 +97,7 @@ void CartesianCommunicator::Barrier(void){}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ assert(0);}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor ;}
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
source =0;

View File

@ -10,6 +10,7 @@
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -53,24 +54,26 @@ WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,
}
#if defined(AVX512)
#include <simd/Intel512wilson.h>
///////////////////////////////////////////////////////////
// If we are AVX512 specialise the single precision routine
///////////////////////////////////////////////////////////
#include <simd/Intel512wilson.h>
#include <simd/Intel512single.h>
static Vector<vComplexF> signs;
int setupSigns(void ){
Vector<vComplexF> bother(2);
static Vector<vComplexF> signsF;
template<typename vtype>
int setupSigns(Vector<vtype>& signs ){
Vector<vtype> bother(2);
signs = bother;
vrsign(signs[0]);
visign(signs[1]);
return 1;
}
static int signInit = setupSigns();
static int signInitF = setupSigns(signsF);
#define label(A) ilabel(A)
#define ilabel(A) ".globl\n" #A ":\n"
@ -78,6 +81,8 @@ static Vector<vComplexF> signs;
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define FX(A) WILSONASM_ ##A
#define COMPLEX_TYPE vComplexF
#define signs signsF
#undef KERNEL_DAG
template<> void
@ -98,8 +103,8 @@ WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder
#undef FX
#define FX(A) DWFASM_ ## A
#define MAYBEPERM(A,B)
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
//#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
//#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
#undef KERNEL_DAG
@ -113,8 +118,71 @@ template<> void
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef COMPLEX_TYPE
#undef signs
#undef VMOVRDUP
#undef MAYBEPERM
#undef MULT_2SPIN
#undef FX
///////////////////////////////////////////////////////////
// If we are AVX512 specialise the double precision routine
///////////////////////////////////////////////////////////
#include <simd/Intel512double.h>
static Vector<vComplexD> signsD;
#define signs signsD
static int signInitD = setupSigns(signsD);
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define FX(A) WILSONASM_ ##A
#define COMPLEX_TYPE vComplexD
#undef KERNEL_DAG
template<> void
WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<> void
WilsonKernels<WilsonImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#endif
#undef VMOVIDUP
#undef VMOVRDUP
#undef MAYBEPERM
#undef MULT_2SPIN
#undef FX
#define FX(A) DWFASM_ ## A
#define MAYBEPERM(A,B)
//#define VMOVIDUP(A,B,C) VBCASTIDUPd(A,B,C)
//#define VMOVRDUP(A,B,C) VBCASTRDUPd(A,B,C)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
#undef KERNEL_DAG
template<> void
WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<> void
WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef COMPLEX_TYPE
#undef signs
#undef VMOVRDUP
#undef MAYBEPERM
#undef MULT_2SPIN
#undef FX
#endif //AVX512
#define INSTANTIATE_ASM(A)\
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\

View File

@ -5,7 +5,9 @@
const uint64_t plocal =(uint64_t) & in._odata[0];
// vComplexF isigns[2] = { signs[0], signs[1] };
vComplexF *isigns = &signs[0];
//COMPLEX_TYPE is vComplexF of vComplexD depending
//on the chosen precision
COMPLEX_TYPE *isigns = &signs[0];
MASK_REGS;
int nmax=U._grid->oSites();

View File

@ -116,7 +116,7 @@ class NerscHmcRunnerTemplate {
NoSmearing<Gimpl> SmearingPolicy;
typedef MinimumNorm2<GaugeField, NoSmearing<Gimpl>, RepresentationsPolicy >
IntegratorType; // change here to change the algorithm
IntegratorParameters MDpar(20, 1.0);
IntegratorParameters MDpar(40, 1.0);
IntegratorType MDynamics(UGrid, MDpar, TheAction, SmearingPolicy);
// Checkpoint strategy

View File

@ -382,7 +382,6 @@ namespace Optimization {
// Some Template specialization
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
#ifndef __INTEL_COMPILER
#warning "Slow reduction due to incomplete reduce intrinsics"
//Complex float Reduce

View File

@ -9,4 +9,4 @@ endif
include Make.inc
subtests:
for d in $(SUBDIRS); do make -C $${d} tests; done
for d in $(SUBDIRS); do $(MAKE) -C $${d} tests; done

View File

@ -42,7 +42,7 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
static void GaugeLinkToLieAlgebraField(const std::vector<GaugeMat> &U,std::vector<GaugeMat> &A) {
for(int mu=0;mu<Nd;mu++){
// ImplComplex cmi(0.0,-1.0);
ComplexD cmi(0.0,-1.0);
Complex cmi(0.0,-1.0);
A[mu] = Ta(U[mu]) * cmi;
}
}
@ -52,13 +52,13 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
}
}
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,RealD & alpha,int maxiter,RealD Omega_tol, RealD Phi_tol) {
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol) {
GridBase *grid = Umu._grid;
RealD org_plaq =WilsonLoops<Gimpl>::avgPlaquette(Umu);
RealD org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);
RealD old_trace = org_link_trace;
RealD trG;
Real org_plaq =WilsonLoops<Gimpl>::avgPlaquette(Umu);
Real org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);
Real old_trace = org_link_trace;
Real trG;
std::vector<GaugeMat> U(Nd,grid);
GaugeMat dmuAmu(grid);
@ -71,13 +71,13 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
// Monitor progress and convergence test
// infrequently to minimise cost overhead
if ( i %20 == 0 ) {
RealD plaq =WilsonLoops<Gimpl>::avgPlaquette(Umu);
RealD link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);
Real plaq =WilsonLoops<Gimpl>::avgPlaquette(Umu);
Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);
std::cout << GridLogMessage << " Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl;
RealD Phi = 1.0 - old_trace / link_trace ;
RealD Omega= 1.0 - trG;
Real Phi = 1.0 - old_trace / link_trace ;
Real Omega= 1.0 - trG;
std::cout << GridLogMessage << " Iteration "<<i<< " Phi= "<<Phi<< " Omega= " << Omega<< " trG " << trG <<std::endl;
@ -91,7 +91,7 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
}
}
};
static RealD SteepestDescentStep(std::vector<GaugeMat> &U,RealD & alpha, GaugeMat & dmuAmu) {
static Real SteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
GridBase *grid = U[0]._grid;
std::vector<GaugeMat> A(Nd,grid);
@ -101,26 +101,26 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
ExpiAlphaDmuAmu(A,g,alpha,dmuAmu);
RealD vol = grid->gSites();
RealD trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
Real vol = grid->gSites();
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static RealD FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,RealD & alpha, GaugeMat & dmuAmu) {
static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
GridBase *grid = U[0]._grid;
RealD vol = grid->gSites();
Real vol = grid->gSites();
FFT theFFT((GridCartesian *)grid);
LatticeComplex Fp(grid);
LatticeComplex psq(grid); psq=zero;
LatticeComplex pmu(grid);
LatticeComplex one(grid); one = ComplexD(1.0,0.0);
LatticeComplex one(grid); one = Complex(1.0,0.0);
GaugeMat g(grid);
GaugeMat dmuAmu_p(grid);
@ -139,13 +139,13 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
std::vector<int> coor(grid->_ndimension,0);
for(int mu=0;mu<Nd;mu++) {
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
Real TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(pmu,mu);
pmu = TwoPiL * pmu ;
psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5);
}
ComplexD psqMax(16.0);
Complex psqMax(16.0);
Fp = psqMax*one/psq;
static int once;
@ -160,20 +160,20 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward);
GaugeMat ciadmam(grid);
ComplexD cialpha(0.0,-alpha);
Complex cialpha(0.0,-alpha);
ciadmam = dmuAmu*cialpha;
SU<Nc>::taExp(ciadmam,g);
RealD trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,RealD & alpha, GaugeMat &dmuAmu) {
static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) {
GridBase *grid = g._grid;
ComplexD cialpha(0.0,-alpha);
Complex cialpha(0.0,-alpha);
GaugeMat ciadmam(grid);
DmuAmu(A,dmuAmu);
ciadmam = dmuAmu*cialpha;
@ -193,11 +193,11 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
ComplexField pha(grid);
GaugeMat Apha(grid);
ComplexD ci(0.0,1.0);
Complex ci(0.0,1.0);
for(int mu=0;mu<Nd;mu++){
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
Real TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(pmu,mu);
pmu = TwoPiL * pmu ;
pha = exp(pmu * (0.5 *ci)); // e(ipmu/2) since Amu(x+mu/2)
@ -213,14 +213,14 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
ComplexField pmu(grid);
ComplexField pha(grid);
ComplexD ci(0.0,1.0);
Complex ci(0.0,1.0);
// Sign convention for FFTW calls:
// A(x)= Sum_p e^ipx A(p) / V
// A(p)= Sum_p e^-ipx A(x)
for(int mu=0;mu<Nd;mu++){
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
Real TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(pmu,mu);
pmu = TwoPiL * pmu ;
pha = exp(-pmu * (0.5 *ci)); // e(+ipmu/2) since Amu(x+mu/2)
@ -241,7 +241,7 @@ int main (int argc, char ** argv)
int threads = GridThread::GetThreads();
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1});
std::vector<int> simd_layout( { vComplex::Nsimd(),1,1,1});
std::vector<int> mpi_layout = GridDefaultMpi();
int vol = 1;
@ -261,25 +261,25 @@ int main (int argc, char ** argv)
std::cout<< "* Testing we can gauge fix steep descent a RGT of Unit gauge *" <<std::endl;
std::cout<< "*****************************************************************" <<std::endl;
LatticeGaugeFieldD Umu(&GRID);
LatticeGaugeFieldD Uorg(&GRID);
LatticeColourMatrixD g(&GRID); // Gauge xform
LatticeGaugeField Umu(&GRID);
LatticeGaugeField Uorg(&GRID);
LatticeColourMatrix g(&GRID); // Gauge xform
SU3::ColdConfiguration(pRNG,Umu); // Unit gauge
Uorg=Umu;
SU3::RandomGaugeTransform(pRNG,Umu,g); // Unit gauge
RealD plaq=WilsonLoops<PeriodicGimplD>::avgPlaquette(Umu);
Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
std::cout << " Initial plaquette "<<plaq << std::endl;
RealD alpha=0.1;
FourierAcceleratedGaugeFixer<PeriodicGimplD>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-10, 1.0e-10);
Real alpha=0.1;
FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-10, 1.0e-10);
plaq=WilsonLoops<PeriodicGimplD>::avgPlaquette(Umu);
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
std::cout << " Final plaquette "<<plaq << std::endl;
Uorg = Uorg - Umu;

View File

@ -93,10 +93,10 @@ int main (int argc, char ** argv)
C=C-Ctilde;
std::cout << "diff scalar "<<norm2(C) << std::endl;
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<< " "<<theFFT.USec() <<std::endl;
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<< " "<<theFFT.USec() <<std::endl;
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<< " "<<theFFT.USec() <<std::endl;
theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<" "<<theFFT.USec() <<std::endl;
SpinMatrixF Sp;
Sp = zero; Sp = Sp+cVol;

View File

@ -68,7 +68,7 @@ class HmcRunner : public NerscHmcRunner {
TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
// Set smearing (true/false), default: false
Nf2.is_smeared = true;
Nf2.is_smeared = false;
// Collect actions
ActionLevel<LatticeGaugeField> Level1(1);