From 10f2872aae87626df0d2ec2a591df88bfbfa68a0 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 15 May 2017 15:51:16 +0100 Subject: [PATCH 01/13] Faster exponentiation for lattice fields --- lib/lattice/Lattice_unary.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/lattice/Lattice_unary.h b/lib/lattice/Lattice_unary.h index f5b324ec..0afeaa32 100644 --- a/lib/lattice/Lattice_unary.h +++ b/lib/lattice/Lattice_unary.h @@ -66,10 +66,21 @@ namespace Grid { Lattice ret(rhs._grid); ret.checkerboard = rhs.checkerboard; conformable(ret,rhs); + obj unit(1.0); parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); + //ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); + ret._odata[ss] = unit; + for(int i=Nexp; i>=1;--i) + ret._odata[ss] = unit + ret._odata[ss]*rhs._odata[ss]*(alpha/RealD(i)); + } + return ret; + + + + + } From bc862ce3ab7b642fba1874f65d867fcbc61c8df2 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 18 May 2017 14:44:56 +0100 Subject: [PATCH 02/13] Fixing an allocation issue in Benchmark_comms --- benchmarks/Benchmark_comms.cc | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 99ab190b..a49e3033 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -40,15 +40,17 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout<1) nmu++; + + std::cout< xbuf(8); std::vector rbuf(8); + Grid.ShmInitGeneric(); Grid.ShmBufferFreeAll(); for(int d=0;d<8;d++){ xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); @@ -269,13 +271,11 @@ int main (int argc, char ** argv) double time = stop-start; // microseconds - std::cout< xbuf(8); std::vector rbuf(8); + Grid.ShmInitGeneric(); Grid.ShmBufferFreeAll(); for(int d=0;d<8;d++){ xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); @@ -353,7 +354,7 @@ int main (int argc, char ** argv) double time = stop-start; // microseconds - std::cout< Date: Thu, 18 May 2017 17:48:11 +0100 Subject: [PATCH 03/13] Reverting change in Bechmark_comms. Keeping 300 iterations --- benchmarks/Benchmark_comms.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index a49e3033..319f01dc 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -40,7 +40,7 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout<1) nmu++; @@ -213,7 +213,6 @@ int main (int argc, char ** argv) std::vector xbuf(8); std::vector rbuf(8); - Grid.ShmInitGeneric(); Grid.ShmBufferFreeAll(); for(int d=0;d<8;d++){ xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); @@ -293,7 +292,6 @@ int main (int argc, char ** argv) std::vector xbuf(8); std::vector rbuf(8); - Grid.ShmInitGeneric(); Grid.ShmBufferFreeAll(); for(int d=0;d<8;d++){ xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); From 8e19c99c7d87bd4af0e9ecb9dd72c68f5368f891 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 18 May 2017 19:07:35 +0100 Subject: [PATCH 04/13] Adding more statistical info in the Benchmark_comms --- benchmarks/Benchmark_comms.cc | 69 +++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 319f01dc..d4c59785 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -31,6 +31,17 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; +void statistics(std::vector v, double &mean, double &std_err){ + double sum = std::accumulate(v.begin(), v.end(), 0.0); + mean = sum / v.size(); + + std::vector diff(v.size()); + std::transform(v.begin(), v.end(), diff.begin(), [mean](double x) { return x - mean; }); + double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); + std_err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); +} + + int main (int argc, char ** argv) { Grid_init(&argc,&argv); @@ -40,12 +51,14 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout<1) nmu++; - + std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; + std::vector t_time(Nloop); + double mean_time, std_err_time; std::cout< requests; @@ -104,18 +117,21 @@ int main (int argc, char ** argv) } Grid.SendToRecvFromComplete(requests); Grid.Barrier(); - + double stop=usecond(); + t_time[i] = stop-start; // microseconds } - double stop=usecond(); + + statistics(t_time, mean_time, std_err_time); double dbytes = bytes; - double xbytes = Nloop*dbytes*2.0*ncomm; + double xbytes = dbytes*2.0*ncomm; double rbytes = xbytes; double bidibytes = xbytes+rbytes; - double time = stop-start; // microseconds + std::cout< requests; @@ -259,18 +278,19 @@ int main (int argc, char ** argv) } Grid.StencilSendToRecvFromComplete(requests); Grid.Barrier(); + double stop=usecond(); + t_time[i] = stop-start; // microseconds } - double stop=usecond(); double dbytes = bytes; double xbytes = Nloop*dbytes*2.0*ncomm; double rbytes = xbytes; double bidibytes = xbytes+rbytes; - double time = stop-start; // microseconds - - std::cout< requests; @@ -340,19 +360,22 @@ int main (int argc, char ** argv) } } - Grid.Barrier(); + Grid.Barrier(); + double stop=usecond(); + t_time[i] = stop-start; // microseconds } - double stop=usecond(); double dbytes = bytes; double xbytes = Nloop*dbytes*2.0*ncomm; double rbytes = xbytes; double bidibytes = xbytes+rbytes; - double time = stop-start; // microseconds - std::cout< Date: Fri, 19 May 2017 10:55:04 +0100 Subject: [PATCH 05/13] Adding more statistics to the Benchmark_comms. Min and max --- benchmarks/Benchmark_comms.cc | 88 ++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 27 deletions(-) diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index d4c59785..ce881ef6 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -31,16 +31,31 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -void statistics(std::vector v, double &mean, double &std_err){ +struct time_statistics{ + double mean; + double err; + double min; + double max; + + void statistics(std::vector v){ double sum = std::accumulate(v.begin(), v.end(), 0.0); mean = sum / v.size(); std::vector diff(v.size()); - std::transform(v.begin(), v.end(), diff.begin(), [mean](double x) { return x - mean; }); + std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); - std_err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); -} + err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); + auto result = std::minmax_element(v.begin(), v.end()); + min = *result.first; + max = *result.second; +} +}; + +void header(){ + std::cout < t_time(Nloop); - double mean_time, std_err_time; + time_statistics timestat; std::cout< Date: Fri, 19 May 2017 16:39:36 +0100 Subject: [PATCH 06/13] Fixing a compilation error for generic SIMD --- lib/simd/Grid_generic.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/simd/Grid_generic.h b/lib/simd/Grid_generic.h index bdf5aa01..e1d5f894 100644 --- a/lib/simd/Grid_generic.h +++ b/lib/simd/Grid_generic.h @@ -281,8 +281,8 @@ namespace Optimization { struct PrecisionChange { static inline vech StoH (const vecf &a,const vecf &b) { -#ifdef USE_FP16 vech ret; +#ifdef USE_FP16 vech *ha = (vech *)&a; vech *hb = (vech *)&b; const int nf = W::r; @@ -493,6 +493,8 @@ namespace Optimization { return a; } + + #undef acc // EIGEN compatibility } ////////////////////////////////////////////////////////////////////////////////////// From ab3596d4d368c9112a3d169d3cd8459d0f072f05 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 25 May 2017 12:07:47 +0100 Subject: [PATCH 07/13] Using Cayley-Hamilton form for the exponential of SU(3) matrices --- lib/lattice/Lattice_unary.h | 9 +-- lib/qcd/action/gauge/GaugeImplTypes.h | 21 ++++-- lib/qcd/smearing/StoutSmearing.h | 2 + lib/tensors/Tensor_exp.h | 103 ++++++++++++++++++++++---- 4 files changed, 106 insertions(+), 29 deletions(-) diff --git a/lib/lattice/Lattice_unary.h b/lib/lattice/Lattice_unary.h index 0afeaa32..44b7b4f1 100644 --- a/lib/lattice/Lattice_unary.h +++ b/lib/lattice/Lattice_unary.h @@ -62,17 +62,12 @@ namespace Grid { return ret; } - template Lattice expMat(const Lattice &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){ + template Lattice expMat(const Lattice &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ Lattice ret(rhs._grid); ret.checkerboard = rhs.checkerboard; conformable(ret,rhs); - obj unit(1.0); parallel_for(int ss=0;ssoSites();ss++){ - //ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); - ret._odata[ss] = unit; - for(int i=Nexp; i>=1;--i) - ret._odata[ss] = unit + ret._odata[ss]*rhs._odata[ss]*(alpha/RealD(i)); - + ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); } return ret; diff --git a/lib/qcd/action/gauge/GaugeImplTypes.h b/lib/qcd/action/gauge/GaugeImplTypes.h index 9d36eead..29e79581 100644 --- a/lib/qcd/action/gauge/GaugeImplTypes.h +++ b/lib/qcd/action/gauge/GaugeImplTypes.h @@ -59,7 +59,7 @@ public: typedef iImplGaugeLink SiteLink; typedef iImplGaugeField SiteField; - typedef Lattice LinkField; + typedef Lattice LinkField; typedef Lattice Field; // Guido: we can probably separate the types from the HMC functions @@ -80,7 +80,7 @@ public: /////////////////////////////////////////////////////////// // Move these to another class - // HMC auxiliary functions + // HMC auxiliary functions static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) { // specific for SU gauge fields LinkField Pmu(P._grid); @@ -92,14 +92,19 @@ public: } static inline Field projectForce(Field &P) { return Ta(P); } - + static inline void update_field(Field& P, Field& U, double ep){ - for (int mu = 0; mu < Nd; mu++) { - auto Umu = PeekIndex(U, mu); - auto Pmu = PeekIndex(P, mu); - Umu = expMat(Pmu, ep, Nexp) * Umu; - PokeIndex(U, ProjectOnGroup(Umu), mu); + //static std::chrono::duration diff; + + //auto start = std::chrono::high_resolution_clock::now(); + parallel_for(int ss=0;ssoSites();ss++){ + for (int mu = 0; mu < Nd; mu++) + U[ss]._internal[mu] = ProjectOnGroup(Exponentiate(P[ss]._internal[mu], ep, Nexp) * U[ss]._internal[mu]); } + + //auto end = std::chrono::high_resolution_clock::now(); + // diff += end - start; + // std::cout << "Time to exponentiate matrix " << diff.count() << " s\n"; } static inline RealD FieldSquareNorm(Field& U){ diff --git a/lib/qcd/smearing/StoutSmearing.h b/lib/qcd/smearing/StoutSmearing.h index b50ffe21..bfc37d0d 100644 --- a/lib/qcd/smearing/StoutSmearing.h +++ b/lib/qcd/smearing/StoutSmearing.h @@ -58,6 +58,8 @@ class Smear_Stout : public Smear { SmearBase->smear(C, U); }; + + // Repetion of code here (use the Tensor_exp.h function) void exponentiate_iQ(GaugeLinkField& e_iQ, const GaugeLinkField& iQ) const { // Put this outside // only valid for SU(3) matrices diff --git a/lib/tensors/Tensor_exp.h b/lib/tensors/Tensor_exp.h index ad0d2071..64c06bb5 100644 --- a/lib/tensors/Tensor_exp.h +++ b/lib/tensors/Tensor_exp.h @@ -37,30 +37,105 @@ namespace Grid { /////////////////////////////////////////////// - template inline iScalar Exponentiate(const iScalar&r, ComplexD alpha , Integer Nexp = DEFAULT_MAT_EXP) + template inline iScalar Exponentiate(const iScalar&r, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP) { iScalar ret; ret._internal = Exponentiate(r._internal, alpha, Nexp); return ret; } - - template::TensorLevel == 0 >::type * =nullptr> - inline iMatrix Exponentiate(const iMatrix &arg, ComplexD alpha , Integer Nexp = DEFAULT_MAT_EXP ) +template inline iVector Exponentiate(const iVector&r, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP) { - iMatrix unit(1.0); - iMatrix temp(unit); - - for(int i=Nexp; i>=1;--i){ - temp *= alpha/ComplexD(i); - temp = unit + temp*arg; - } - - return temp; - + iVector ret; + for (int i = 0; i < N; i++) + ret._internal[i] = Exponentiate(r._internal[i], alpha, Nexp); + return ret; } + // Specialisation: Cayley-Hamilton exponential for SU(3) + template::TensorLevel == 0>::type * =nullptr> + inline iMatrix Exponentiate(const iMatrix &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP ) + { + // for SU(3) 2x faster than the std implementation using Nexp=12 + // notice that it actually computes + // exp ( input matrix ) + // the i sign is coming from outside + // input matrix is anti-hermitian NOT hermitian + typedef iMatrix mat; + typedef iScalar scalar; + mat unit(1.0); + mat temp(unit); + const Complex one_over_three = 1.0 / 3.0; + const Complex one_over_two = 1.0 / 2.0; + + scalar c0, c1, tmp, c0max, theta, u, w; + scalar xi0, u2, w2, cosw; + scalar fden, h0, h1, h2; + scalar e2iu, emiu, ixi0, qt; + scalar f0, f1, f2; + scalar unity(1.0); + + mat iQ2 = arg*arg*alpha*alpha; + mat iQ3 = arg*iQ2*alpha; + // sign in c0 from the conventions on the Ta + c0 = -imag( trace(iQ3) ) * one_over_three; + c1 = -real( trace(iQ2) ) * one_over_two; + + // Cayley Hamilton checks to machine precision, tested + tmp = c1 * one_over_three; + c0max = 2.0 * pow(tmp, 1.5); + + theta = acos(c0 / c0max) * one_over_three; + u = sqrt(tmp) * cos(theta); + w = sqrt(c1) * sin(theta); + + xi0 = sin(w) / w; + u2 = u * u; + w2 = w * w; + cosw = cos(w); + + ixi0 = timesI(xi0); + emiu = cos(u) - timesI(sin(u)); + e2iu = cos(2.0 * u) + timesI(sin(2.0 * u)); + + h0 = e2iu * (u2 - w2) + + emiu * ((8.0 * u2 * cosw) + (2.0 * u * (3.0 * u2 + w2) * ixi0)); + h1 = e2iu * (2.0 * u) - emiu * ((2.0 * u * cosw) - (3.0 * u2 - w2) * ixi0); + h2 = e2iu - emiu * (cosw + (3.0 * u) * ixi0); + + fden = unity / (9.0 * u2 - w2); // reals + f0 = h0 * fden; + f1 = h1 * fden; + f2 = h2 * fden; + + return (f0 * unit + timesMinusI(f1) * arg*alpha - f2 * iQ2); + } + + + +// General exponential +template::TensorLevel == 0 >::type * =nullptr> + inline iMatrix Exponentiate(const iMatrix &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP ) + { + // notice that it actually computes + // exp ( input matrix ) + // the i sign is coming from outside + // input matrix is anti-hermitian NOT hermitian + typedef iMatrix mat; + mat unit(1.0); + mat temp(unit); + for(int i=Nexp; i>=1;--i){ + temp *= alpha/ComplexD(i); + temp = unit + temp*arg; + } + return temp; + + } + + + + } #endif From 3c112a7a25eebf5d1ae611f900d0d65057336fb3 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 25 May 2017 12:09:00 +0100 Subject: [PATCH 08/13] Small correction to the general exp definition --- lib/tensors/Tensor_exp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tensors/Tensor_exp.h b/lib/tensors/Tensor_exp.h index 64c06bb5..2f584134 100644 --- a/lib/tensors/Tensor_exp.h +++ b/lib/tensors/Tensor_exp.h @@ -127,7 +127,7 @@ template::Tens mat unit(1.0); mat temp(unit); for(int i=Nexp; i>=1;--i){ - temp *= alpha/ComplexD(i); + temp *= alpha/ReadD(i); temp = unit + temp*arg; } return temp; From 75856f29454eaa3cfd7f002fa90ebd074ba663f4 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 25 May 2017 12:44:56 +0100 Subject: [PATCH 09/13] Compilation fix in the Tensor_exp --- lib/tensors/Tensor_exp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tensors/Tensor_exp.h b/lib/tensors/Tensor_exp.h index 2f584134..e18fed70 100644 --- a/lib/tensors/Tensor_exp.h +++ b/lib/tensors/Tensor_exp.h @@ -127,7 +127,7 @@ template::Tens mat unit(1.0); mat temp(unit); for(int i=Nexp; i>=1;--i){ - temp *= alpha/ReadD(i); + temp *= alpha/RealD(i); temp = unit + temp*arg; } return temp; From a74c34315c79c4b52871e87b312943174dbaf1e2 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 25 May 2017 14:27:49 +0100 Subject: [PATCH 10/13] Bootstrap script fix --- bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bootstrap.sh b/bootstrap.sh index 66eb63ee..dfb6735d 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -1,4 +1,4 @@ -]#!/usr/bin/env bash +#!/usr/bin/env bash EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' From f4e8bf28589d2629364b7e4a3279d76879ef818a Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 26 May 2017 12:45:59 +0100 Subject: [PATCH 11/13] Fixing the topological charge. Wilson Flow tested, ok --- lib/qcd/smearing/WilsonFlow.h | 18 +++++++++++++++--- lib/qcd/utils/WilsonLoops.h | 26 ++++++++++++++++++++------ tests/smearing/Test_WilsonFlow.cc | 11 +++++++++-- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/lib/qcd/smearing/WilsonFlow.h b/lib/qcd/smearing/WilsonFlow.h index 8b8f9a81..34800d11 100644 --- a/lib/qcd/smearing/WilsonFlow.h +++ b/lib/qcd/smearing/WilsonFlow.h @@ -36,8 +36,10 @@ namespace QCD { template class WilsonFlow: public Smear{ unsigned int Nstep; + unsigned int measure_interval; RealD epsilon; + mutable WilsonGaugeAction SG; void evolve_step(typename Gimpl::GaugeField&) const; @@ -47,9 +49,10 @@ class WilsonFlow: public Smear{ public: INHERIT_GIMPL_TYPES(Gimpl) - explicit WilsonFlow(unsigned int Nstep, RealD epsilon): + explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1): Nstep(Nstep), epsilon(epsilon), + measure_interval(interval), SG(WilsonGaugeAction(3.0)) { // WilsonGaugeAction with beta 3.0 assert(epsilon > 0.0); @@ -107,11 +110,20 @@ RealD WilsonFlow::energyDensityPlaquette(unsigned int step, const GaugeFi template void WilsonFlow::smear(GaugeField& out, const GaugeField& in) const { out = in; - for (unsigned int step = 0; step < Nstep; step++) { + for (unsigned int step = 1; step <= Nstep; step++) { + auto start = std::chrono::high_resolution_clock::now(); evolve_step(out); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = end - start; + std::cout << "Time to evolve " << diff.count() << " s\n"; std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " - << step << " " + << step << " " << energyDensityPlaquette(step,out) << std::endl; + if( step % measure_interval == 0){ + std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " + << step << " " + << WilsonLoops::TopologicalCharge(out) << std::endl; + } } } diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index 5382882e..77b5db95 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -197,10 +197,13 @@ public: std::vector U(Nd, grid); for (int d = 0; d < Nd; d++) { + // this operation is taking too much time U[d] = PeekIndex(Umu, d); } staple = zero; - GaugeMat tmp(grid); + GaugeMat tmp1(grid); + GaugeMat tmp2(grid); + for (int nu = 0; nu < Nd; nu++) { @@ -214,22 +217,34 @@ public: // | // __| // + tmp1 = Cshift(U[nu], mu, 1); + tmp2 = Cshift(U[mu], nu, 1); + staple += tmp1*adj(U[nu] * tmp2); + + +/* staple += Gimpl::ShiftStaple( Gimpl::CovShiftForward( U[nu], nu, Gimpl::CovShiftBackward( U[mu], mu, Gimpl::CovShiftIdentityBackward(U[nu], nu))), mu); - +*/ // __ // | // |__ // // + + tmp2 = adj(U[mu]*tmp1)*U[nu]; + staple += Cshift(tmp2, nu, -1); + + /* staple += Gimpl::ShiftStaple( Gimpl::CovShiftBackward(U[nu], nu, Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu); +*/ } } } @@ -289,8 +304,7 @@ public: // staple = Gimpl::ShiftStaple( Gimpl::CovShiftBackward(U[nu], nu, - Gimpl::CovShiftBackward(U[mu], mu, U[nu])), - mu); + Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu); } } @@ -307,10 +321,10 @@ public: GaugeMat Vup(Umu._grid), Vdn(Umu._grid); StapleUpper(Vup, Umu, mu, nu); StapleLower(Vdn, Umu, mu, nu); - GaugeMat v = adj(Vup) - adj(Vdn); + GaugeMat v = Vup - Vdn; GaugeMat u = PeekIndex(Umu, mu); // some redundant copies GaugeMat vu = v*u; - FS = 0.25*Ta(u*v + Cshift(vu, mu, +1)); + FS = 0.25*Ta(u*v + Cshift(vu, mu, -1)); } static Real TopologicalCharge(GaugeLorentz &U){ diff --git a/tests/smearing/Test_WilsonFlow.cc b/tests/smearing/Test_WilsonFlow.cc index 4e6bd0af..19ae5575 100644 --- a/tests/smearing/Test_WilsonFlow.cc +++ b/tests/smearing/Test_WilsonFlow.cc @@ -42,22 +42,29 @@ int main(int argc, char **argv) { GridRedBlackCartesian RBGrid(latt_size, simd_layout, mpi_layout); std::vector seeds({1, 2, 3, 4, 5}); + GridSerialRNG sRNG; GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); LatticeGaugeField Umu(&Grid), Uflow(&Grid); SU::HotConfiguration(pRNG, Umu); + CheckpointerParameters CPPar("ckpoint_lat", "ckpoint_rng"); + BinaryHmcCheckpointer CPBin(CPPar); + + CPBin.CheckpointRestore(3000, Umu, sRNG, pRNG); std::cout << std::setprecision(15); std::cout << GridLogMessage << "Plaquette: " << WilsonLoops::avgPlaquette(Umu) << std::endl; - WilsonFlow WF(200, 0.01); + WilsonFlow WF(200, 0.01, 50); WF.smear(Uflow, Umu); RealD WFlow_plaq = WilsonLoops::avgPlaquette(Uflow); - std::cout << GridLogMessage << "Plaquette: "<< WFlow_plaq << std::endl; + RealD WFlow_TC = WilsonLoops::TopologicalCharge(Uflow); + std::cout << GridLogMessage << "Plaquette : "<< WFlow_plaq << std::endl; + std::cout << GridLogMessage << "TopologicalCharge : "<< WFlow_TC << std::endl; std::cout<< GridLogMessage << " Admissibility check:\n"; const double sp_adm = 0.067; // admissible threshold From 0de314870d5d926fb71d25e87dfdd0ba38f89742 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Fri, 26 May 2017 14:31:49 +0100 Subject: [PATCH 12/13] Faster derivative for WilsonGauge --- lib/qcd/action/gauge/WilsonGaugeAction.h | 12 ++++-- lib/qcd/utils/WilsonLoops.h | 47 +++++++++++++++--------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/lib/qcd/action/gauge/WilsonGaugeAction.h b/lib/qcd/action/gauge/WilsonGaugeAction.h index 77c2424c..1ea780b7 100644 --- a/lib/qcd/action/gauge/WilsonGaugeAction.h +++ b/lib/qcd/action/gauge/WilsonGaugeAction.h @@ -71,14 +71,18 @@ class WilsonGaugeAction : public Action { RealD factor = 0.5 * beta / RealD(Nc); - GaugeLinkField Umu(U._grid); + //GaugeLinkField Umu(U._grid); GaugeLinkField dSdU_mu(U._grid); for (int mu = 0; mu < Nd; mu++) { - Umu = PeekIndex(U, mu); + //Umu = PeekIndex(U, mu); // Staple in direction mu - WilsonLoops::Staple(dSdU_mu, U, mu); - dSdU_mu = Ta(Umu * dSdU_mu) * factor; + //WilsonLoops::Staple(dSdU_mu, U, mu); + //dSdU_mu = Ta(Umu * dSdU_mu) * factor; + + + WilsonLoops::StapleMult(dSdU_mu, U, mu); + dSdU_mu = Ta(dSdU_mu) * factor; PokeIndex(dSdU, dSdU_mu, mu); } diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index 77b5db95..3c0e9ee4 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -188,13 +188,10 @@ public: } } - ////////////////////////////////////////////////// - // the sum over all staples on each site - ////////////////////////////////////////////////// - static void Staple(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { +// For the force term +static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { GridBase *grid = Umu._grid; - std::vector U(Nd, grid); for (int d = 0; d < Nd; d++) { // this operation is taking too much time @@ -204,6 +201,31 @@ public: GaugeMat tmp1(grid); GaugeMat tmp2(grid); + for (int nu = 0; nu < Nd; nu++) { + if (nu != mu) { + // this is ~10% faster than the Staple + tmp1 = Cshift(U[nu], mu, 1); + tmp2 = Cshift(U[mu], nu, 1); + staple += tmp1* adj(U[nu]*tmp2); + tmp2 = adj(U[mu]*tmp1)*U[nu]; + staple += Cshift(tmp2, nu, -1); + } + } + staple = U[mu]*staple; +} + + ////////////////////////////////////////////////// + // the sum over all staples on each site + ////////////////////////////////////////////////// + static void Staple(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { + + GridBase *grid = Umu._grid; + + std::vector U(Nd, grid); + for (int d = 0; d < Nd; d++) { + U[d] = PeekIndex(Umu, d); + } + staple = zero; for (int nu = 0; nu < Nd; nu++) { @@ -217,34 +239,23 @@ public: // | // __| // - tmp1 = Cshift(U[nu], mu, 1); - tmp2 = Cshift(U[mu], nu, 1); - - staple += tmp1*adj(U[nu] * tmp2); - - -/* + staple += Gimpl::ShiftStaple( Gimpl::CovShiftForward( U[nu], nu, Gimpl::CovShiftBackward( U[mu], mu, Gimpl::CovShiftIdentityBackward(U[nu], nu))), mu); -*/ + // __ // | // |__ // // - tmp2 = adj(U[mu]*tmp1)*U[nu]; - staple += Cshift(tmp2, nu, -1); - - /* staple += Gimpl::ShiftStaple( Gimpl::CovShiftBackward(U[nu], nu, Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu); -*/ } } } From 7c6cc85df6b6f6a43f86099f033aa762777ede55 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Sat, 27 May 2017 18:03:49 +0100 Subject: [PATCH 13/13] Updating WilsonFlow test --- lib/qcd/smearing/WilsonFlow.h | 5 +++ tests/smearing/Test_WilsonFlow.cc | 52 ++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/lib/qcd/smearing/WilsonFlow.h b/lib/qcd/smearing/WilsonFlow.h index 34800d11..00b7622e 100644 --- a/lib/qcd/smearing/WilsonFlow.h +++ b/lib/qcd/smearing/WilsonFlow.h @@ -107,6 +107,9 @@ RealD WilsonFlow::energyDensityPlaquette(unsigned int step, const GaugeFi return 2.0 * td * td * SG.S(U)/U._grid->gSites(); } + +//#define WF_TIMING + template void WilsonFlow::smear(GaugeField& out, const GaugeField& in) const { out = in; @@ -115,7 +118,9 @@ void WilsonFlow::smear(GaugeField& out, const GaugeField& in) const { evolve_step(out); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration diff = end - start; + #ifdef WF_TIMING std::cout << "Time to evolve " << diff.count() << " s\n"; + #endif std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << energyDensityPlaquette(step,out) << std::endl; diff --git a/tests/smearing/Test_WilsonFlow.cc b/tests/smearing/Test_WilsonFlow.cc index 19ae5575..c337bf29 100644 --- a/tests/smearing/Test_WilsonFlow.cc +++ b/tests/smearing/Test_WilsonFlow.cc @@ -28,6 +28,37 @@ directory /* END LEGAL */ #include +namespace Grid{ + struct WFParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters, + int, steps, + double, step_size, + int, meas_interval); + + + template + WFParameters(Reader& Reader){ + read(Reader, "WilsonFlow", *this); + } + + }; + + struct ConfParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(ConfParameters, + std::string, conf_prefix, + std::string, rng_prefix, + int, StartConfiguration, + int, EndConfiguration, + int, Skip); + + template + ConfParameters(Reader& Reader){ + read(Reader, "Configurations", *this); + } + + }; +} + int main(int argc, char **argv) { using namespace Grid; using namespace Grid::QCD; @@ -48,23 +79,30 @@ int main(int argc, char **argv) { LatticeGaugeField Umu(&Grid), Uflow(&Grid); SU::HotConfiguration(pRNG, Umu); - CheckpointerParameters CPPar("ckpoint_lat", "ckpoint_rng"); + + typedef Grid::JSONReader Serialiser; + Serialiser Reader("input.json"); + WFParameters WFPar(Reader); + ConfParameters CPar(Reader); + CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix); BinaryHmcCheckpointer CPBin(CPPar); - CPBin.CheckpointRestore(3000, Umu, sRNG, pRNG); + for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){ + + CPBin.CheckpointRestore(conf, Umu, sRNG, pRNG); std::cout << std::setprecision(15); - std::cout << GridLogMessage << "Plaquette: " + std::cout << GridLogMessage << "Initial plaquette: " << WilsonLoops::avgPlaquette(Umu) << std::endl; - WilsonFlow WF(200, 0.01, 50); + WilsonFlow WF(WFPar.steps, WFPar.step_size, WFPar.meas_interval); WF.smear(Uflow, Umu); RealD WFlow_plaq = WilsonLoops::avgPlaquette(Uflow); RealD WFlow_TC = WilsonLoops::TopologicalCharge(Uflow); - std::cout << GridLogMessage << "Plaquette : "<< WFlow_plaq << std::endl; - std::cout << GridLogMessage << "TopologicalCharge : "<< WFlow_TC << std::endl; + std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl; + std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl; std::cout<< GridLogMessage << " Admissibility check:\n"; const double sp_adm = 0.067; // admissible threshold @@ -80,6 +118,6 @@ int main(int argc, char **argv) { std::cout<< GridLogMessage << " (sp_admissible = "<< sp_adm <<")\n"; //std::cout<< GridLogMessage << " sp_admissible - sp_max = "<